diff --git a/README.md b/README.md --- a/README.md +++ b/README.md @@ -102,8 +102,9 @@ * Running a serial build will be **slow**. To improve speed, try running a parallel build. That's done by default in Ninja; for ``make``, use the option - ``-j NNN``, where ``NNN`` is the number of parallel jobs, e.g. the number of - CPUs you have. + ``-j NNN``, where ``NNN`` is the number of parallel jobs to run. + In most cases, you get the best performance if you specify the number of CPU threads you have. + On some Unix systems, you can specify this with ``-j$(nproc)``. * For more information see [CMake](https://llvm.org/docs/CMake.html) diff --git a/clang-tools-extra/docs/clang-tidy/Contributing.rst b/clang-tools-extra/docs/clang-tidy/Contributing.rst --- a/clang-tools-extra/docs/clang-tidy/Contributing.rst +++ b/clang-tools-extra/docs/clang-tidy/Contributing.rst @@ -364,6 +364,11 @@ test. The ``ASTMatchersTests`` target contains unit tests for the public AST matcher classes and is a good source of testing idioms for matchers. +You can build the Clang-tidy unit tests by building the ``ClangTidyTests`` target. +Test targets in LLVM and Clang are excluded from the "build all" style action of +IDE-based CMake generators, so you need to explicitly build the target for the unit +tests to be built. + Making your check robust ^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/docs/ClangFormattedStatus.rst b/clang/docs/ClangFormattedStatus.rst --- a/clang/docs/ClangFormattedStatus.rst +++ b/clang/docs/ClangFormattedStatus.rst @@ -17,7 +17,7 @@ ====================== :doc:`ClangFormattedStatus` describes the state of LLVM source -tree in terms of conformance to :doc:`ClangFormat` as of: January 03, 2022 11:33:59 (`cd2b050fa499 `_). +tree in terms of conformance to :doc:`ClangFormat` as of: March 06, 2022 17:32:26 (`830ba4cebe79 `_). .. list-table:: LLVM Clang-Format Status @@ -29,6 +29,106 @@ - Formatted Files - Unformatted Files - % Complete + * - bolt/include/bolt/Core + - `15` + - `10` + - `5` + - :part:`66%` + * - bolt/include/bolt/Passes + - `47` + - `47` + - `0` + - :good:`100%` + * - bolt/include/bolt/Profile + - `8` + - `8` + - `0` + - :good:`100%` + * - bolt/include/bolt/Rewrite + - `5` + - `4` + - `1` + - :part:`80%` + * - bolt/include/bolt/RuntimeLibs + - `3` + - `3` + - `0` + - :good:`100%` + * - bolt/include/bolt/Utils + - `4` + - `4` + - `0` + - :good:`100%` + * - bolt/lib/Core + - `14` + - `5` + - `9` + - :part:`35%` + * - bolt/lib/Passes + - `45` + - `21` + - `24` + - :part:`46%` + * - bolt/lib/Profile + - `7` + - `3` + - `4` + - :part:`42%` + * - bolt/lib/Rewrite + - `6` + - `0` + - `6` + - :none:`0%` + * - bolt/lib/RuntimeLibs + - `3` + - `3` + - `0` + - :good:`100%` + * - bolt/lib/Target/AArch64 + - `1` + - `0` + - `1` + - :none:`0%` + * - bolt/lib/Target/X86 + - `1` + - `0` + - `1` + - :none:`0%` + * - bolt/lib/Utils + - `2` + - `1` + - `1` + - :part:`50%` + * - bolt/runtime + - `3` + - `0` + - `3` + - :none:`0%` + * - bolt/tools/driver + - `1` + - `0` + - `1` + - :none:`0%` + * - bolt/tools/heatmap + - `1` + - `1` + - `0` + - :good:`100%` + * - bolt/tools/llvm-bolt-fuzzer + - `1` + - `1` + - `0` + - :good:`100%` + * - bolt/tools/merge-fdata + - `1` + - `0` + - `1` + - :none:`0%` + * - bolt/unittests/Core + - `1` + - `1` + - `0` + - :good:`100%` * - clang/bindings/python/tests/cindex/INPUTS - `5` - `3` @@ -80,10 +180,10 @@ - `2` - :none:`0%` * - clang/include/clang/Analysis/FlowSensitive - - `7` - - `6` + - `16` + - `15` - `1` - - :part:`85%` + - :part:`93%` * - clang/include/clang/Analysis/Support - `1` - `0` @@ -251,12 +351,12 @@ - :none:`0%` * - clang/include/clang/Tooling/DependencyScanning - `5` - - `4` - - `1` - - :part:`80%` + - `5` + - `0` + - :good:`100%` * - clang/include/clang/Tooling/Inclusions - - `2` - - `2` + - `3` + - `3` - `0` - :good:`100%` * - clang/include/clang/Tooling/Refactoring @@ -279,6 +379,11 @@ - `5` - `0` - :good:`100%` + * - clang/include/clang/Tooling/Syntax/Pseudo + - `5` + - `5` + - `0` + - :good:`100%` * - clang/include/clang/Tooling/Transformer - `8` - `6` @@ -296,12 +401,12 @@ - :none:`0%` * - clang/lib/Analysis - `28` - - `4` - - `24` - - :part:`14%` + - `3` + - `25` + - :part:`10%` * - clang/lib/Analysis/FlowSensitive - - `2` - - `2` + - `7` + - `7` - `0` - :good:`100%` * - clang/lib/Analysis/plugins/CheckerDependencyHandling @@ -361,9 +466,9 @@ - :part:`50%` * - clang/lib/CodeGen - `87` - - `10` - - `77` - - :part:`11%` + - `9` + - `78` + - :part:`10%` * - clang/lib/CrossTU - `1` - `0` @@ -400,9 +505,9 @@ - `12` - :part:`14%` * - clang/lib/Driver/ToolChains - - `95` + - `94` - `41` - - `54` + - `53` - :part:`43%` * - clang/lib/Driver/ToolChains/Arch - `20` @@ -415,15 +520,15 @@ - `3` - :none:`0%` * - clang/lib/Format - - `33` - - `33` + - `35` + - `35` - `0` - :good:`100%` * - clang/lib/Frontend - `32` - - `3` - - `29` - - :part:`9%` + - `4` + - `28` + - :part:`12%` * - clang/lib/Frontend/Rewrite - `8` - `0` @@ -436,14 +541,14 @@ - :none:`0%` * - clang/lib/Headers - `146` - - `16` - - `130` - - :part:`10%` + - `14` + - `132` + - :part:`9%` * - clang/lib/Headers/openmp_wrappers - `5` - - `5` - - `0` - - :good:`100%` + - `4` + - `1` + - :part:`80%` * - clang/lib/Headers/ppc_wrappers - `7` - `2` @@ -465,9 +570,9 @@ - `0` - :good:`100%` * - clang/lib/Lex - - `23` + - `24` - `1` - - `22` + - `23` - :part:`4%` * - clang/lib/Parse - `15` @@ -481,19 +586,19 @@ - :none:`0%` * - clang/lib/Sema - `55` - - `5` - - `50` - - :part:`9%` + - `4` + - `51` + - :part:`7%` * - clang/lib/Serialization - `17` - `2` - `15` - :part:`11%` * - clang/lib/StaticAnalyzer/Checkers - - `118` - - `16` - - `102` - - :part:`13%` + - `122` + - `19` + - `103` + - :part:`15%` * - clang/lib/StaticAnalyzer/Checkers/cert - `2` - `2` @@ -551,17 +656,17 @@ - :none:`0%` * - clang/lib/Tooling/DependencyScanning - `5` - - `3` - - `2` - - :part:`60%` + - `4` + - `1` + - :part:`80%` * - clang/lib/Tooling/DumpTool - `4` - `3` - `1` - :part:`75%` * - clang/lib/Tooling/Inclusions - - `2` - - `2` + - `3` + - `3` - `0` - :good:`100%` * - clang/lib/Tooling/Refactoring @@ -584,6 +689,11 @@ - `6` - `1` - :part:`85%` + * - clang/lib/Tooling/Syntax/Pseudo + - `8` + - `8` + - `0` + - :good:`100%` * - clang/lib/Tooling/Transformer - `7` - `4` @@ -669,6 +779,11 @@ - `0` - `1` - :none:`0%` + * - clang/tools/clang-linker-wrapper + - `3` + - `2` + - `1` + - :part:`66%` * - clang/tools/clang-nvlink-wrapper - `1` - `1` @@ -684,6 +799,11 @@ - `1` - `0` - :good:`100%` + * - clang/tools/clang-pseudo + - `1` + - `1` + - `0` + - :good:`100%` * - clang/tools/clang-refactor - `4` - `4` @@ -735,10 +855,10 @@ - `4` - :part:`33%` * - clang/unittests/Analysis/FlowSensitive - - `5` - - `5` - - `0` - - :good:`100%` + - `14` + - `13` + - `1` + - :part:`92%` * - clang/unittests/AST - `30` - `8` @@ -780,8 +900,8 @@ - `4` - :part:`20%` * - clang/unittests/Format - - `23` - - `23` + - `24` + - `24` - `0` - :good:`100%` * - clang/unittests/Frontend @@ -810,10 +930,10 @@ - `1` - :none:`0%` * - clang/unittests/Lex - - `7` - - `3` + - `8` - `4` - - :part:`42%` + - `4` + - :part:`50%` * - clang/unittests/libclang - `2` - `0` @@ -850,10 +970,10 @@ - `9` - :part:`43%` * - clang/unittests/Tooling - - `29` - - `8` - - `21` - - :part:`27%` + - `30` + - `10` + - `20` + - :part:`33%` * - clang/unittests/Tooling/RecursiveASTVisitorTests - `30` - `12` @@ -864,6 +984,11 @@ - `3` - `4` - :part:`42%` + * - clang/unittests/Tooling/Syntax/Pseudo + - `4` + - `4` + - `0` + - :good:`100%` * - clang/utils/perf-training/cxx - `1` - `0` @@ -965,10 +1090,10 @@ - `1` - :none:`0%` * - clang-tools-extra/clang-tidy - - `18` - - `12` + - `20` + - `14` - `6` - - :part:`66%` + - :part:`70%` * - clang-tools-extra/clang-tidy/abseil - `42` - `31` @@ -990,10 +1115,10 @@ - `0` - :good:`100%` * - clang-tools-extra/clang-tidy/bugprone - - `121` - - `101` - - `20` - - :part:`83%` + - `125` + - `106` + - `19` + - :part:`84%` * - clang-tools-extra/clang-tidy/cert - `29` - `28` @@ -1006,9 +1131,9 @@ - :part:`80%` * - clang-tools-extra/clang-tidy/cppcoreguidelines - `45` - - `41` - - `4` - - :part:`91%` + - `42` + - `3` + - :part:`93%` * - clang-tools-extra/clang-tidy/darwin - `5` - `2` @@ -1045,15 +1170,15 @@ - `0` - :good:`100%` * - clang-tools-extra/clang-tidy/misc - - `31` - - `28` + - `33` + - `30` - `3` - :part:`90%` * - clang-tools-extra/clang-tidy/modernize - `67` - - `49` - - `18` - - :part:`73%` + - `48` + - `19` + - :part:`71%` * - clang-tools-extra/clang-tidy/mpi - `5` - `5` @@ -1085,10 +1210,10 @@ - `2` - :part:`60%` * - clang-tools-extra/clang-tidy/readability - - `83` - - `70` - - `13` - - :part:`84%` + - `88` + - `76` + - `12` + - :part:`86%` * - clang-tools-extra/clang-tidy/tool - `3` - `2` @@ -1106,9 +1231,9 @@ - :good:`100%` * - clang-tools-extra/clangd - `97` - - `83` - - `14` - - :part:`85%` + - `81` + - `16` + - :part:`83%` * - clang-tools-extra/clangd/benchmarks - `1` - `1` @@ -1126,14 +1251,14 @@ - :good:`100%` * - clang-tools-extra/clangd/index - `39` - - `37` - - `2` - - :part:`94%` + - `36` + - `3` + - :part:`92%` * - clang-tools-extra/clangd/index/dex - `9` - - `8` - - `1` - - :part:`88%` + - `7` + - `2` + - :part:`77%` * - clang-tools-extra/clangd/index/dex/dexp - `1` - `1` @@ -1170,10 +1295,10 @@ - `0` - :good:`100%` * - clang-tools-extra/clangd/refactor - - `4` - - `3` + - `6` + - `5` - `1` - - :part:`75%` + - :part:`83%` * - clang-tools-extra/clangd/refactor/tweaks - `14` - `10` @@ -1190,8 +1315,8 @@ - `0` - :good:`100%` * - clang-tools-extra/clangd/unittests - - `78` - - `65` + - `79` + - `66` - `13` - :part:`83%` * - clang-tools-extra/clangd/unittests/decision_forest_model @@ -1211,9 +1336,9 @@ - :good:`100%` * - clang-tools-extra/clangd/unittests/tweaks - `20` - - `20` - - `0` - - :good:`100%` + - `19` + - `1` + - :part:`95%` * - clang-tools-extra/clangd/unittests/xpc - `1` - `1` @@ -1286,9 +1411,9 @@ - :none:`0%` * - clang-tools-extra/unittests/clang-tidy - `16` - - `8` - - `8` - - :part:`50%` + - `9` + - `7` + - :part:`56%` * - clang-tools-extra/unittests/include/common - `1` - `0` @@ -1310,10 +1435,10 @@ - `1` - :part:`66%` * - compiler-rt/lib/asan - - `56` - - `4` + - `57` + - `5` - `52` - - :part:`7%` + - :part:`8%` * - compiler-rt/lib/asan/tests - `17` - `1` @@ -1346,9 +1471,9 @@ - :none:`0%` * - compiler-rt/lib/dfsan - `14` - - `10` - - `4` - - :part:`71%` + - `9` + - `5` + - :part:`64%` * - compiler-rt/lib/fuzzer - `47` - `9` @@ -1395,10 +1520,10 @@ - `0` - :good:`100%` * - compiler-rt/lib/hwasan - - `29` - - `8` + - `30` + - `9` - `21` - - :part:`27%` + - :part:`30%` * - compiler-rt/lib/interception - `8` - `1` @@ -1415,10 +1540,10 @@ - `16` - :part:`20%` * - compiler-rt/lib/memprof - - `32` - `31` - - `1` - - :part:`96%` + - `29` + - `2` + - :part:`93%` * - compiler-rt/lib/memprof/tests - `2` - `2` @@ -1435,10 +1560,10 @@ - `4` - :none:`0%` * - compiler-rt/lib/orc - - `19` - - `14` + - `21` + - `16` - `5` - - :part:`73%` + - :part:`76%` * - compiler-rt/lib/orc/unittests - `10` - `9` @@ -1456,9 +1581,9 @@ - :part:`33%` * - compiler-rt/lib/sanitizer_common - `167` - - `28` - - `139` - - :part:`16%` + - `29` + - `138` + - :part:`17%` * - compiler-rt/lib/sanitizer_common/symbolizer - `2` - `2` @@ -1476,9 +1601,9 @@ - :none:`0%` * - compiler-rt/lib/scudo/standalone - `49` - - `49` - - `0` - - :good:`100%` + - `48` + - `1` + - :part:`97%` * - compiler-rt/lib/scudo/standalone/benchmarks - `1` - `1` @@ -1496,9 +1621,9 @@ - :good:`100%` * - compiler-rt/lib/scudo/standalone/tests - `25` - - `25` - - `0` - - :good:`100%` + - `24` + - `1` + - :part:`96%` * - compiler-rt/lib/scudo/standalone/tools - `1` - `1` @@ -1570,19 +1695,19 @@ - `0` - :good:`100%` * - cross-project-tests/debuginfo-tests/clang_llvm_roundtrip + - `2` - `1` - - `0` - `1` - - :none:`0%` + - :part:`50%` * - cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/penalty - - `8` + - `10` - `0` - - `8` + - `10` - :none:`0%` * - cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect - - `5` + - `7` - `0` - - `5` + - `7` - :none:`0%` * - cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_address - `7` @@ -1630,9 +1755,9 @@ - `1` - :none:`0%` * - cross-project-tests/debuginfo-tests/dexter/feature_tests/subtools/clang-opt-bisect - - `1` + - `2` - `0` - - `1` + - `2` - :none:`0%` * - cross-project-tests/debuginfo-tests/dexter-tests - `15` @@ -1654,7 +1779,7 @@ - `1` - `0` - :good:`100%` - * - flang/examples/flang-omp-report-plugin + * - flang/examples/FlangOmpReport - `3` - `3` - `0` @@ -1670,8 +1795,8 @@ - `0` - :good:`100%` * - flang/include/flang/Common - - `20` - - `20` + - `21` + - `21` - `0` - :good:`100%` * - flang/include/flang/Decimal @@ -1685,8 +1810,8 @@ - `0` - :good:`100%` * - flang/include/flang/Frontend + - `11` - `10` - - `9` - `1` - :part:`90%` * - flang/include/flang/FrontendTool @@ -1695,10 +1820,10 @@ - `0` - :good:`100%` * - flang/include/flang/Lower - - `19` - - `19` - - `0` - - :good:`100%` + - `25` + - `24` + - `1` + - :part:`96%` * - flang/include/flang/Lower/Support - `2` - `2` @@ -1710,8 +1835,8 @@ - `0` - :good:`100%` * - flang/include/flang/Optimizer/Builder/Runtime - - `8` - - `8` + - `10` + - `10` - `0` - :good:`100%` * - flang/include/flang/Optimizer/CodeGen @@ -1740,8 +1865,8 @@ - `1` - :part:`94%` * - flang/include/flang/Runtime + - `28` - `27` - - `26` - `1` - :part:`96%` * - flang/include/flang/Semantics @@ -1775,38 +1900,38 @@ - `0` - :good:`100%` * - flang/lib/Lower - - `17` - - `16` - - `1` - - :part:`94%` + - `20` + - `20` + - `0` + - :good:`100%` * - flang/lib/Optimizer/Builder - `6` - `6` - `0` - :good:`100%` * - flang/lib/Optimizer/Builder/Runtime - - `7` - - `7` + - `9` + - `9` - `0` - :good:`100%` * - flang/lib/Optimizer/CodeGen - `10` - - `9` - - `1` - - :part:`90%` + - `10` + - `0` + - :good:`100%` * - flang/lib/Optimizer/Dialect - - `4` - - `3` - - `1` - - :part:`75%` + - `5` + - `5` + - `0` + - :good:`100%` * - flang/lib/Optimizer/Support - - `3` - - `3` + - `4` + - `4` - `0` - :good:`100%` * - flang/lib/Optimizer/Transforms - - `11` - - `11` + - `10` + - `10` - `0` - :good:`100%` * - flang/lib/Parser @@ -1816,19 +1941,24 @@ - :good:`100%` * - flang/lib/Semantics - `78` - - `73` - - `5` - - :part:`93%` + - `69` + - `9` + - :part:`88%` * - flang/module - `1` - `1` - `0` - :good:`100%` * - flang/runtime - - `73` + - `74` - `72` + - `2` + - :part:`97%` + * - flang/tools/bbc + - `1` - `1` - - :part:`98%` + - `0` + - :good:`100%` * - flang/tools/f18 - `1` - `1` @@ -1854,6 +1984,11 @@ - `1` - `0` - :good:`100%` + * - flang/unittests/Common + - `1` + - `1` + - `0` + - :good:`100%` * - flang/unittests/Decimal - `2` - `2` @@ -1880,13 +2015,13 @@ - `0` - :good:`100%` * - flang/unittests/Optimizer/Builder/Runtime - - `8` - - `8` + - `10` + - `10` - `0` - :good:`100%` * - flang/unittests/Runtime - - `21` - - `21` + - `22` + - `22` - `0` - :good:`100%` * - libc/AOR_v20.02/math @@ -1960,8 +2095,28 @@ - `1` - :part:`66%` * - libc/include - - `3` - - `3` + - `1` + - `1` + - `0` + - :good:`100%` + * - libc/include/llvm-libc-macros + - `2` + - `2` + - `0` + - :good:`100%` + * - libc/include/llvm-libc-macros/linux + - `1` + - `1` + - `0` + - :good:`100%` + * - libc/include/llvm-libc-types + - `28` + - `28` + - `0` + - :good:`100%` + * - libc/loader/linux/aarch64 + - `1` + - `1` - `0` - :good:`100%` * - libc/loader/linux/x86_64 @@ -1980,8 +2135,18 @@ - `0` - :good:`100%` * - libc/src/errno - - `5` - - `5` + - `4` + - `4` + - `0` + - :good:`100%` + * - libc/src/fcntl + - `3` + - `3` + - `0` + - :good:`100%` + * - libc/src/fcntl/linux + - `3` + - `3` - `0` - :good:`100%` * - libc/src/fenv @@ -1995,8 +2160,8 @@ - `0` - :good:`100%` * - libc/src/math - - `88` - - `88` + - `91` + - `91` - `0` - :good:`100%` * - libc/src/math/aarch64 @@ -2005,13 +2170,13 @@ - `0` - :good:`100%` * - libc/src/math/generic - - `89` - - `89` + - `94` + - `94` - `0` - :good:`100%` * - libc/src/math/x86_64 - - `6` - - `6` + - `3` + - `3` - `0` - :good:`100%` * - libc/src/signal @@ -2030,13 +2195,13 @@ - `0` - :good:`100%` * - libc/src/stdlib - - `41` - - `41` + - `46` + - `46` - `0` - :good:`100%` * - libc/src/stdlib/linux - - `1` - - `1` + - `2` + - `2` - `0` - :good:`100%` * - libc/src/string @@ -2046,9 +2211,9 @@ - :good:`100%` * - libc/src/string/memory_utils - `8` - - `8` - - `0` - - :good:`100%` + - `7` + - `1` + - :part:`87%` * - libc/src/sys/mman - `2` - `2` @@ -2056,32 +2221,42 @@ - :good:`100%` * - libc/src/sys/mman/linux - `2` + - `1` + - `1` + - :part:`50%` + * - libc/src/sys/stat + - `2` + - `2` + - `0` + - :good:`100%` + * - libc/src/sys/stat/linux + - `2` - `2` - `0` - :good:`100%` * - libc/src/threads - - `12` - - `12` + - `16` + - `16` - `0` - :good:`100%` * - libc/src/threads/linux - - `16` - - `9` + - `11` - `7` - - :part:`56%` + - `4` + - :part:`63%` * - libc/src/time - `12` - `12` - `0` - :good:`100%` * - libc/src/unistd - - `1` - - `1` + - `7` + - `7` - `0` - :good:`100%` * - libc/src/unistd/linux - - `1` - - `1` + - `7` + - `7` - `0` - :good:`100%` * - libc/src/__support @@ -2090,30 +2265,35 @@ - `0` - :good:`100%` * - libc/src/__support/CPP - - `7` - - `7` - - `0` - - :good:`100%` - * - libc/src/__support/FPUtil - - `16` - - `16` + - `11` + - `10` + - `1` + - :part:`90%` + * - libc/src/__support/File + - `2` + - `2` - `0` - :good:`100%` + * - libc/src/__support/FPUtil + - `15` + - `14` + - `1` + - :part:`93%` * - libc/src/__support/FPUtil/aarch64 - - `2` - - `2` + - `3` + - `3` - `0` - :good:`100%` * - libc/src/__support/FPUtil/generic - - `1` - - `1` + - `3` + - `3` - `0` - :good:`100%` * - libc/src/__support/FPUtil/x86_64 - `6` - - `6` - - `0` - - :good:`100%` + - `5` + - `1` + - :part:`83%` * - libc/src/__support/OSUtil - `3` - `3` @@ -2121,7 +2301,12 @@ - :good:`100%` * - libc/src/__support/OSUtil/linux - `3` - - `3` + - `2` + - `1` + - :part:`66%` + * - libc/src/__support/OSUtil/linux/aarch64 + - `1` + - `1` - `0` - :good:`100%` * - libc/src/__support/OSUtil/linux/x86_64 @@ -2129,6 +2314,16 @@ - `1` - `0` - :good:`100%` + * - libc/src/__support/threads + - `1` + - `1` + - `0` + - :good:`100%` + * - libc/src/__support/threads/linux + - `1` + - `1` + - `0` + - :good:`100%` * - libc/utils/HdrGen - `9` - `9` @@ -2160,10 +2355,10 @@ - `0` - :good:`100%` * - libc/utils/UnitTest - - `10` - - `10` - - `0` - - :good:`100%` + - `12` + - `11` + - `1` + - :part:`91%` * - libclc/generic/include - `2` - `1` @@ -2305,20 +2500,20 @@ - `1` - :none:`0%` * - libcxx/benchmarks - - `27` - - `9` + - `28` + - `10` - `18` - - :part:`33%` + - :part:`35%` * - libcxx/include - - `23` + - `22` - `0` - - `23` + - `22` - :none:`0%` * - libcxx/include/__algorithm - - `93` - - `0` - - `93` - - :none:`0%` + - `102` + - `15` + - `87` + - :part:`14%` * - libcxx/include/__bit - `2` - `0` @@ -2329,11 +2524,16 @@ - `0` - `3` - :none:`0%` - * - libcxx/include/__compare - - `10` + * - libcxx/include/__chrono + - `8` - `0` - - `10` + - `8` - :none:`0%` + * - libcxx/include/__compare + - `13` + - `1` + - `12` + - :part:`7%` * - libcxx/include/__concepts - `22` - `0` @@ -2350,24 +2550,29 @@ - `13` - :part:`18%` * - libcxx/include/__format + - `17` + - `2` - `15` - - `0` - - `15` - - :none:`0%` + - :part:`11%` * - libcxx/include/__functional - `27` - `0` - `27` - :none:`0%` + * - libcxx/include/__ios + - `1` + - `0` + - `1` + - :none:`0%` * - libcxx/include/__iterator - - `32` + - `36` - `0` - - `32` + - `36` - :none:`0%` * - libcxx/include/__memory - - `18` + - `19` - `1` - - `17` + - `18` - :part:`5%` * - libcxx/include/__numeric - `13` @@ -2375,15 +2580,15 @@ - `9` - :part:`30%` * - libcxx/include/__random - - `36` - - `0` - - `36` - - :none:`0%` + - `37` + - `2` + - `35` + - :part:`5%` * - libcxx/include/__ranges - - `25` - - `0` - - `25` - - :none:`0%` + - `29` + - `2` + - `27` + - :part:`6%` * - libcxx/include/__support/android - `1` - `0` @@ -2430,25 +2635,25 @@ - `3` - :none:`0%` * - libcxx/include/__thread - - `1` + - `2` - `0` - - `1` + - `2` - :none:`0%` * - libcxx/include/__utility - - `16` - - `0` - - `16` - - :none:`0%` + - `17` + - `5` + - `12` + - :part:`29%` * - libcxx/include/__variant - `1` - `0` - `1` - :none:`0%` * - libcxx/src - - `41` - - `5` + - `42` + - `6` - `36` - - :part:`12%` + - :part:`14%` * - libcxx/src/experimental - `2` - `1` @@ -2530,10 +2735,10 @@ - `24` - :part:`35%` * - lld/Common - - `10` - - `8` + - `11` + - `9` - `2` - - :part:`80%` + - :part:`81%` * - lld/ELF - `48` - `25` @@ -2545,18 +2750,18 @@ - `10` - :part:`28%` * - lld/include/lld/Common - - `13` + - `14` + - `8` - `6` - - `7` - - :part:`46%` + - :part:`57%` * - lld/include/lld/Core - `20` - `4` - `16` - :part:`20%` * - lld/MachO + - `45` - `43` - - `41` - `2` - :part:`95%` * - lld/MachO/Arch @@ -2631,9 +2836,9 @@ - :part:`36%` * - lldb/include/lldb/Core - `61` - - `32` - - `29` - - :part:`52%` + - `31` + - `30` + - :part:`50%` * - lldb/include/lldb/DataFormatters - `18` - `10` @@ -2710,30 +2915,30 @@ - `21` - :part:`40%` * - lldb/include/lldb/Target - - `77` - - `50` + - `78` + - `51` - `27` - - :part:`64%` + - :part:`65%` * - lldb/include/lldb/Utility - `63` - - `40` - - `23` - - :part:`63%` + - `41` + - `22` + - :part:`65%` * - lldb/include/lldb/Version - `1` - `1` - `0` - :good:`100%` * - lldb/source/API - - `74` - - `6` - - `68` - - :part:`8%` + - `73` + - `36` + - `37` + - :part:`49%` * - lldb/source/Breakpoint - `24` - - `5` - - `19` - - :part:`20%` + - `6` + - `18` + - :part:`25%` * - lldb/source/Commands - `70` - `57` @@ -2741,9 +2946,9 @@ - :part:`81%` * - lldb/source/Core - `49` - - `25` - - `24` - - :part:`51%` + - `26` + - `23` + - :part:`53%` * - lldb/source/DataFormatters - `16` - `3` @@ -2771,9 +2976,9 @@ - :good:`100%` * - lldb/source/Host/linux - `5` - - `4` - - `1` - - :part:`80%` + - `5` + - `0` + - :good:`100%` * - lldb/source/Host/macosx/cfcpp - `14` - `12` @@ -2796,9 +3001,9 @@ - :part:`50%` * - lldb/source/Host/posix - `9` - - `5` - - `4` - - :part:`55%` + - `6` + - `3` + - :part:`66%` * - lldb/source/Host/windows - `11` - `7` @@ -3006,9 +3211,9 @@ - :none:`0%` * - lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime - `16` - - `6` - - `10` - - :part:`37%` + - `5` + - `11` + - :part:`31%` * - lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime - `8` - `3` @@ -3146,9 +3351,9 @@ - :part:`80%` * - lldb/source/Plugins/Process/gdb-remote - `26` - - `14` - - `12` - - :part:`53%` + - `15` + - `11` + - :part:`57%` * - lldb/source/Plugins/Process/Linux - `21` - `11` @@ -3176,9 +3381,9 @@ - :part:`50%` * - lldb/source/Plugins/Process/POSIX - `8` - - `5` - - `3` - - :part:`62%` + - `7` + - `1` + - :part:`87%` * - lldb/source/Plugins/Process/scripted - `4` - `4` @@ -3246,9 +3451,9 @@ - :none:`0%` * - lldb/source/Plugins/SymbolFile/DWARF - `65` - - `38` - - `27` - - :part:`58%` + - `39` + - `26` + - :part:`60%` * - lldb/source/Plugins/SymbolFile/NativePDB - `20` - `10` @@ -3325,15 +3530,15 @@ - `13` - :part:`58%` * - lldb/source/Target - - `68` - - `33` + - `69` + - `34` - `35` - - :part:`48%` + - :part:`49%` * - lldb/source/Utility - `58` - - `45` - - `13` - - :part:`77%` + - `46` + - `12` + - :part:`79%` * - lldb/source/Version - `1` - `1` @@ -3411,9 +3616,9 @@ - :part:`40%` * - lldb/tools/lldb-vscode - `27` - - `25` - - `2` - - :part:`92%` + - `24` + - `3` + - :part:`88%` * - lldb/unittests - `1` - `1` @@ -3460,10 +3665,10 @@ - `2` - :part:`60%` * - lldb/unittests/Host - - `15` - - `10` + - `16` + - `11` - `5` - - :part:`66%` + - :part:`68%` * - lldb/unittests/Host/linux - `2` - `2` @@ -3480,10 +3685,10 @@ - `1` - :none:`0%` * - lldb/unittests/Interpreter - - `5` - - `1` + - `6` + - `2` - `4` - - :part:`20%` + - :part:`33%` * - lldb/unittests/Language/CLanguages - `1` - `1` @@ -3520,10 +3725,10 @@ - `1` - :none:`0%` * - lldb/unittests/Platform + - `3` - `2` - `1` - - `1` - - :part:`50%` + - :part:`66%` * - lldb/unittests/Platform/Android - `1` - `0` @@ -3610,10 +3815,10 @@ - `0` - :good:`100%` * - lldb/unittests/Target - - `9` - - `5` + - `10` + - `6` - `4` - - :part:`55%` + - :part:`60%` * - lldb/unittests/TestingSupport - `5` - `4` @@ -3640,9 +3845,9 @@ - `2` - :none:`0%` * - lldb/unittests/tools/lldb-server/tests - - `8` + - `7` - `0` - - `8` + - `7` - :none:`0%` * - lldb/unittests/UnwindAssembly/ARM64 - `1` @@ -3660,10 +3865,10 @@ - `1` - :none:`0%` * - lldb/unittests/Utility - - `46` + - `45` - `32` - - `14` - - :part:`69%` + - `13` + - :part:`71%` * - lldb/utils/lit-cpuid - `1` - `0` @@ -3895,15 +4100,15 @@ - `6` - :part:`25%` * - llvm/include/llvm/ADT - - `91` - - `26` - - `65` - - :part:`28%` + - `93` + - `25` + - `68` + - :part:`26%` * - llvm/include/llvm/Analysis - - `129` - - `51` + - `130` + - `52` - `78` - - :part:`39%` + - :part:`40%` * - llvm/include/llvm/Analysis/Utils - `3` - `1` @@ -3915,30 +4120,30 @@ - `3` - :part:`40%` * - llvm/include/llvm/BinaryFormat - - `14` + - `15` - `8` - - `6` - - :part:`57%` + - `7` + - :part:`53%` * - llvm/include/llvm/Bitcode - `7` - - `3` - - `4` - - :part:`42%` + - `2` + - `5` + - :part:`28%` * - llvm/include/llvm/Bitstream - `3` - `0` - `3` - :none:`0%` * - llvm/include/llvm/CodeGen - - `156` - - `50` - - `106` + - `158` + - `51` + - `107` - :part:`32%` * - llvm/include/llvm/CodeGen/GlobalISel - - `29` - - `9` - - `20` - - :part:`31%` + - `27` + - `8` + - `19` + - :part:`29%` * - llvm/include/llvm/CodeGen/MIRParser - `2` - `1` @@ -3990,13 +4195,13 @@ - `19` - :part:`64%` * - llvm/include/llvm/DebugInfo/Symbolize + - `5` - `3` - `2` - - `1` - - :part:`66%` + - :part:`60%` * - llvm/include/llvm/Debuginfod - - `2` - - `2` + - `3` + - `3` - `0` - :good:`100%` * - llvm/include/llvm/Demangle @@ -4021,19 +4226,19 @@ - :part:`16%` * - llvm/include/llvm/ExecutionEngine/JITLink - `16` - - `13` - - `3` - - :part:`81%` + - `14` + - `2` + - :part:`87%` * - llvm/include/llvm/ExecutionEngine/Orc - `38` - - `28` - - `10` - - :part:`73%` + - `29` + - `9` + - :part:`76%` * - llvm/include/llvm/ExecutionEngine/Orc/Shared - - `7` + - `8` - `4` - - `3` - - :part:`57%` + - `4` + - :part:`50%` * - llvm/include/llvm/ExecutionEngine/Orc/TargetProcess - `7` - `7` @@ -4060,9 +4265,9 @@ - `0` - :good:`100%` * - llvm/include/llvm/IR - - `92` + - `93` - `28` - - `64` + - `65` - :part:`30%` * - llvm/include/llvm/IRReader - `1` @@ -4091,9 +4296,9 @@ - :none:`0%` * - llvm/include/llvm/MC - `74` - - `23` - - `51` - - :part:`31%` + - `24` + - `50` + - :part:`32%` * - llvm/include/llvm/MC/MCDisassembler - `4` - `1` @@ -4119,6 +4324,36 @@ - `8` - `0` - :good:`100%` + * - llvm/include/llvm/ObjCopy + - `4` + - `3` + - `1` + - :part:`75%` + * - llvm/include/llvm/ObjCopy/COFF + - `2` + - `2` + - `0` + - :good:`100%` + * - llvm/include/llvm/ObjCopy/ELF + - `2` + - `2` + - `0` + - :good:`100%` + * - llvm/include/llvm/ObjCopy/MachO + - `2` + - `2` + - `0` + - :good:`100%` + * - llvm/include/llvm/ObjCopy/wasm + - `2` + - `2` + - `0` + - :good:`100%` + * - llvm/include/llvm/ObjCopy/XCOFF + - `2` + - `2` + - `0` + - :good:`100%` * - llvm/include/llvm/Object - `31` - `12` @@ -4140,10 +4375,10 @@ - `2` - :part:`50%` * - llvm/include/llvm/ProfileData - - `10` - - `5` + - `11` - `5` - - :part:`50%` + - `6` + - :part:`45%` * - llvm/include/llvm/ProfileData/Coverage - `3` - `2` @@ -4155,9 +4390,9 @@ - `1` - :part:`91%` * - llvm/include/llvm/Support - - `182` - - `67` - - `115` + - `186` + - `68` + - `118` - :part:`36%` * - llvm/include/llvm/Support/FileSystem - `1` @@ -4175,10 +4410,10 @@ - `1` - :none:`0%` * - llvm/include/llvm/TableGen - - `8` - - `2` + - `9` + - `3` - `6` - - :part:`25%` + - :part:`33%` * - llvm/include/llvm/Target - `6` - `2` @@ -4231,24 +4466,29 @@ - :part:`58%` * - llvm/include/llvm/Transforms/IPO - `38` - - `27` - - `11` - - :part:`71%` + - `28` + - `10` + - :part:`73%` * - llvm/include/llvm/Transforms/Scalar - `75` - `47` - `28` - :part:`62%` * - llvm/include/llvm/Transforms/Utils - - `73` - - `43` + - `74` + - `44` - `30` - - :part:`58%` + - :part:`59%` * - llvm/include/llvm/Transforms/Vectorize - `5` - `1` - `4` - :part:`20%` + * - llvm/include/llvm/WindowsDriver + - `2` + - `1` + - `1` + - :part:`50%` * - llvm/include/llvm/WindowsManifest - `1` - `1` @@ -4285,10 +4525,10 @@ - `2` - :part:`33%` * - llvm/lib/BinaryFormat - - `12` - - `9` + - `13` + - `10` - `3` - - :part:`75%` + - :part:`76%` * - llvm/lib/Bitcode/Reader - `7` - `2` @@ -4305,25 +4545,25 @@ - `1` - :none:`0%` * - llvm/lib/CodeGen - - `215` - - `57` - - `158` - - :part:`26%` + - `220` + - `60` + - `160` + - :part:`27%` * - llvm/lib/CodeGen/AsmPrinter - `45` - `18` - `27` - :part:`40%` * - llvm/lib/CodeGen/GlobalISel - - `26` + - `24` - `9` - - `17` - - :part:`34%` + - `15` + - :part:`37%` * - llvm/lib/CodeGen/LiveDebugValues - `5` - - `2` - - `3` - - :part:`40%` + - `1` + - `4` + - :part:`20%` * - llvm/lib/CodeGen/MIRParser - `4` - `1` @@ -4356,9 +4596,9 @@ - :part:`75%` * - llvm/lib/DebugInfo/PDB - `40` - - `34` - - `6` - - :part:`85%` + - `35` + - `5` + - :part:`87%` * - llvm/lib/DebugInfo/PDB/DIA - `18` - `15` @@ -4371,12 +4611,12 @@ - :part:`74%` * - llvm/lib/DebugInfo/Symbolize - `4` - - `2` - - `2` - - :part:`50%` + - `3` + - `1` + - :part:`75%` * - llvm/lib/Debuginfod - - `2` - - `2` + - `3` + - `3` - `0` - :good:`100%` * - llvm/lib/Demangle @@ -4411,9 +4651,9 @@ - :none:`0%` * - llvm/lib/ExecutionEngine/JITLink - `23` - - `17` - - `6` - - :part:`73%` + - `15` + - `8` + - :part:`65%` * - llvm/lib/ExecutionEngine/MCJIT - `2` - `0` @@ -4430,8 +4670,8 @@ - `15` - :part:`59%` * - llvm/lib/ExecutionEngine/Orc/Shared - - `3` - - `3` + - `4` + - `4` - `0` - :good:`100%` * - llvm/lib/ExecutionEngine/Orc/TargetProcess @@ -4539,11 +4779,41 @@ - `7` - `1` - :part:`87%` + * - llvm/lib/ObjCopy + - `4` + - `3` + - `1` + - :part:`75%` + * - llvm/lib/ObjCopy/COFF + - `7` + - `7` + - `0` + - :good:`100%` + * - llvm/lib/ObjCopy/ELF + - `3` + - `3` + - `0` + - :good:`100%` + * - llvm/lib/ObjCopy/MachO + - `9` + - `9` + - `0` + - :good:`100%` + * - llvm/lib/ObjCopy/wasm + - `7` + - `7` + - `0` + - :good:`100%` + * - llvm/lib/ObjCopy/XCOFF + - `6` + - `3` + - `3` + - :part:`50%` * - llvm/lib/Object - `31` - - `15` - `16` - - :part:`48%` + - `15` + - :part:`51%` * - llvm/lib/ObjectYAML - `23` - `9` @@ -4560,10 +4830,10 @@ - `3` - :part:`50%` * - llvm/lib/ProfileData - - `10` + - `11` - `4` - - `6` - - :part:`40%` + - `7` + - :part:`36%` * - llvm/lib/ProfileData/Coverage - `3` - `0` @@ -4575,30 +4845,30 @@ - `3` - :part:`76%` * - llvm/lib/Support - - `141` - - `58` + - `144` + - `61` - `83` - - :part:`41%` + - :part:`42%` * - llvm/lib/Support/Unix - `1` - `0` - `1` - :none:`0%` * - llvm/lib/TableGen - - `13` - - `1` + - `15` + - `3` - `12` - - :part:`7%` + - :part:`20%` * - llvm/lib/Target - `5` - - `0` - - `5` - - :none:`0%` + - `1` + - `4` + - :part:`20%` * - llvm/lib/Target/AArch64 - `60` - - `6` - - `54` - - :part:`10%` + - `7` + - `53` + - :part:`11%` * - llvm/lib/Target/AArch64/AsmParser - `1` - `0` @@ -4631,9 +4901,9 @@ - :none:`0%` * - llvm/lib/Target/AMDGPU - `169` - - `39` - - `130` - - :part:`23%` + - `38` + - `131` + - :part:`22%` * - llvm/lib/Target/AMDGPU/AsmParser - `1` - `0` @@ -4651,9 +4921,9 @@ - :good:`100%` * - llvm/lib/Target/AMDGPU/MCTargetDesc - `21` - - `6` - - `15` - - :part:`28%` + - `5` + - `16` + - :part:`23%` * - llvm/lib/Target/AMDGPU/TargetInfo - `2` - `1` @@ -4716,9 +4986,9 @@ - :none:`0%` * - llvm/lib/Target/AVR - `24` - - `24` - - `0` - - :good:`100%` + - `23` + - `1` + - :part:`95%` * - llvm/lib/Target/AVR/AsmParser - `1` - `1` @@ -4731,10 +5001,10 @@ - :good:`100%` * - llvm/lib/Target/AVR/MCTargetDesc - `20` - - `20` - - `0` - - :good:`100%` - * - llvm/lib/Target/AVR/TargetInfo + - `18` + - `2` + - :part:`90%` + * - llvm/lib/Target/AVR/TargetInfo - `2` - `2` - `0` @@ -4765,8 +5035,8 @@ - `1` - :part:`50%` * - llvm/lib/Target/CSKY - - `20` - - `20` + - `23` + - `23` - `0` - :good:`100%` * - llvm/lib/Target/CSKY/AsmParser @@ -4774,11 +5044,16 @@ - `1` - `0` - :good:`100%` - * - llvm/lib/Target/CSKY/MCTargetDesc - - `15` - - `15` + * - llvm/lib/Target/CSKY/Disassembler + - `1` + - `1` - `0` - :good:`100%` + * - llvm/lib/Target/CSKY/MCTargetDesc + - `15` + - `14` + - `1` + - :part:`93%` * - llvm/lib/Target/CSKY/TargetInfo - `2` - `2` @@ -4834,6 +5109,21 @@ - `2` - `0` - :good:`100%` + * - llvm/lib/Target/LoongArch + - `19` + - `19` + - `0` + - :good:`100%` + * - llvm/lib/Target/LoongArch/MCTargetDesc + - `12` + - `12` + - `0` + - :good:`100%` + * - llvm/lib/Target/LoongArch/TargetInfo + - `2` + - `2` + - `0` + - :good:`100%` * - llvm/lib/Target/M68k - `26` - `25` @@ -4856,9 +5146,9 @@ - :part:`85%` * - llvm/lib/Target/M68k/MCTargetDesc - `12` - - `12` - - `0` - - :good:`100%` + - `11` + - `1` + - :part:`91%` * - llvm/lib/Target/M68k/TargetInfo - `2` - `2` @@ -4930,10 +5220,10 @@ - `0` - :good:`100%` * - llvm/lib/Target/PowerPC - - `53` - - `4` + - `54` + - `5` - `49` - - :part:`7%` + - :part:`9%` * - llvm/lib/Target/PowerPC/AsmParser - `1` - `0` @@ -4960,10 +5250,10 @@ - `0` - :good:`100%` * - llvm/lib/Target/RISCV - - `34` - - `18` - - `16` - - :part:`52%` + - `36` + - `17` + - `19` + - :part:`47%` * - llvm/lib/Target/RISCV/AsmParser - `1` - `0` @@ -5035,10 +5325,10 @@ - `0` - :good:`100%` * - llvm/lib/Target/VE - - `21` - - `17` - - `4` - - :part:`80%` + - `24` + - `19` + - `5` + - :part:`79%` * - llvm/lib/Target/VE/AsmParser - `1` - `1` @@ -5091,9 +5381,9 @@ - :good:`100%` * - llvm/lib/Target/X86 - `82` - - `18` - - `64` - - :part:`21%` + - `19` + - `63` + - :part:`23%` * - llvm/lib/Target/X86/AsmParser - `3` - `0` @@ -5104,6 +5394,11 @@ - `0` - `2` - :none:`0%` + * - llvm/lib/Target/X86/MCA + - `2` + - `2` + - `0` + - :good:`100%` * - llvm/lib/Target/X86/MCTargetDesc - `25` - `5` @@ -5141,9 +5436,9 @@ - :good:`100%` * - llvm/lib/TextAPI - `11` - - `11` - - `0` - - :good:`100%` + - `9` + - `2` + - :part:`81%` * - llvm/lib/ToolDrivers/llvm-dlltool - `1` - `0` @@ -5180,15 +5475,15 @@ - `15` - :part:`6%` * - llvm/lib/Transforms/Instrumentation - - `22` - - `8` + - `21` + - `7` - `14` - - :part:`36%` + - :part:`33%` * - llvm/lib/Transforms/IPO - `44` - - `10` - - `34` - - :part:`22%` + - `9` + - `35` + - :part:`20%` * - llvm/lib/Transforms/ObjCARC - `15` - `4` @@ -5200,15 +5495,20 @@ - `63` - :part:`20%` * - llvm/lib/Transforms/Utils - - `77` - - `18` + - `78` + - `19` - `59` - - :part:`23%` + - :part:`24%` * - llvm/lib/Transforms/Vectorize - `22` - `13` - `9` - :part:`59%` + * - llvm/lib/WindowsDriver + - `1` + - `1` + - `0` + - :good:`100%` * - llvm/lib/WindowsManifest - `1` - `1` @@ -5311,9 +5611,9 @@ - :none:`0%` * - llvm/tools/llvm-cxxdump - `4` - - `2` - - `2` - - :part:`50%` + - `1` + - `3` + - :part:`25%` * - llvm/tools/llvm-cxxfilt - `1` - `1` @@ -5344,6 +5644,11 @@ - `0` - `1` - :none:`0%` + * - llvm/tools/llvm-dis-fuzzer + - `1` + - `1` + - `0` + - :good:`100%` * - llvm/tools/llvm-dlang-demangle-fuzzer - `2` - `2` @@ -5510,30 +5815,10 @@ - `1` - :none:`0%` * - llvm/tools/llvm-objcopy - - `6` - - `5` - - `1` - - :part:`83%` - * - llvm/tools/llvm-objcopy/COFF - - `9` - - `9` - - `0` - - :good:`100%` - * - llvm/tools/llvm-objcopy/ELF - - `5` - - `2` - `3` - - :part:`40%` - * - llvm/tools/llvm-objcopy/MachO - - `11` - - `11` - - `0` - - :good:`100%` - * - llvm/tools/llvm-objcopy/wasm - - `9` - - `9` - - `0` - - :good:`100%` + - `2` + - `1` + - :part:`66%` * - llvm/tools/llvm-objdump - `15` - `10` @@ -5561,9 +5846,9 @@ - :none:`0%` * - llvm/tools/llvm-profgen - `11` - - `7` - - `4` - - :part:`63%` + - `6` + - `5` + - :part:`54%` * - llvm/tools/llvm-rc - `12` - `6` @@ -5576,14 +5861,19 @@ - :part:`15%` * - llvm/tools/llvm-reduce - `7` - - `7` - - `0` - - :good:`100%` + - `6` + - `1` + - :part:`85%` * - llvm/tools/llvm-reduce/deltas - `40` - `39` - `1` - :part:`97%` + * - llvm/tools/llvm-remark-size-diff + - `1` + - `1` + - `0` + - :good:`100%` * - llvm/tools/llvm-rtdyld - `1` - `0` @@ -5616,9 +5906,9 @@ - :good:`100%` * - llvm/tools/llvm-split - `1` - - `1` - `0` - - :good:`100%` + - `1` + - :none:`0%` * - llvm/tools/llvm-stress - `1` - `0` @@ -5715,10 +6005,10 @@ - `0` - :good:`100%` * - llvm/unittests/ADT - - `78` - - `32` - - `46` - - :part:`41%` + - `77` + - `29` + - `48` + - :part:`37%` * - llvm/unittests/Analysis - `38` - `13` @@ -5745,10 +6035,10 @@ - `1` - :part:`50%` * - llvm/unittests/CodeGen - - `19` - - `9` + - `20` - `10` - - :part:`47%` + - `10` + - :part:`50%` * - llvm/unittests/CodeGen/GlobalISel - `13` - `2` @@ -5761,9 +6051,9 @@ - :part:`50%` * - llvm/unittests/DebugInfo/DWARF - `17` - - `12` - - `5` - - :part:`70%` + - `13` + - `4` + - :part:`76%` * - llvm/unittests/DebugInfo/GSYM - `1` - `0` @@ -5821,9 +6111,9 @@ - :none:`0%` * - llvm/unittests/Frontend - `4` - - `4` - - `0` - - :good:`100%` + - `3` + - `1` + - :part:`75%` * - llvm/unittests/FuzzMutate - `4` - `0` @@ -5836,9 +6126,9 @@ - :good:`100%` * - llvm/unittests/IR - `36` - - `5` - - `31` - - :part:`13%` + - `6` + - `30` + - :part:`16%` * - llvm/unittests/LineEditor - `1` - `0` @@ -5874,6 +6164,11 @@ - `0` - `1` - :none:`0%` + * - llvm/unittests/ObjCopy + - `1` + - `1` + - `0` + - :good:`100%` * - llvm/unittests/Object - `9` - `6` @@ -5895,20 +6190,20 @@ - `0` - :good:`100%` * - llvm/unittests/ProfileData - - `4` - - `1` + - `5` + - `2` - `3` - - :part:`25%` + - :part:`40%` * - llvm/unittests/Remarks - `8` - `5` - `3` - :part:`62%` * - llvm/unittests/Support - - `98` - - `33` + - `100` + - `35` - `65` - - :part:`33%` + - :part:`35%` * - llvm/unittests/Support/CommandLineInit - `1` - `1` @@ -5920,10 +6215,10 @@ - `4` - :none:`0%` * - llvm/unittests/TableGen + - `3` + - `1` - `2` - - `0` - - `2` - - :none:`0%` + - :part:`33%` * - llvm/unittests/Target/AArch64 - `3` - `1` @@ -5935,10 +6230,10 @@ - `0` - :good:`100%` * - llvm/unittests/Target/ARM + - `2` - `1` - - `0` - `1` - - :none:`0%` + - :part:`50%` * - llvm/unittests/Target/PowerPC - `1` - `1` @@ -5954,6 +6249,11 @@ - `0` - `1` - :none:`0%` + * - llvm/unittests/Testing/Support + - `1` + - `1` + - `0` + - :good:`100%` * - llvm/unittests/TextAPI - `5` - `3` @@ -6055,15 +6355,15 @@ - `1` - :none:`0%` * - llvm/utils/TableGen - - `76` - - `11` + - `78` + - `13` - `65` - - :part:`14%` + - :part:`16%` * - llvm/utils/TableGen/GlobalISel - `17` - - `8` - - `9` - - :part:`47%` + - `10` + - `7` + - :part:`58%` * - llvm/utils/unittest/googlemock/include/gmock - `12` - `0` @@ -6285,18 +6585,18 @@ - `0` - :good:`100%` * - mlir/include/mlir/Analysis - - `14` - - `12` + - `7` + - `5` - `2` - - :part:`85%` + - :part:`71%` * - mlir/include/mlir/Analysis/AliasAnalysis - `1` - `1` - `0` - :good:`100%` * - mlir/include/mlir/Analysis/Presburger - - `6` - - `6` + - `9` + - `9` - `0` - :good:`100%` * - mlir/include/mlir/Bindings/Python @@ -6354,6 +6654,21 @@ - `1` - `0` - :good:`100%` + * - mlir/include/mlir/Conversion/ControlFlowToLLVM + - `1` + - `1` + - `0` + - :good:`100%` + * - mlir/include/mlir/Conversion/ControlFlowToSPIRV + - `2` + - `2` + - `0` + - :good:`100%` + * - mlir/include/mlir/Conversion/FuncToSPIRV + - `2` + - `2` + - `0` + - :good:`100%` * - mlir/include/mlir/Conversion/GPUCommon - `1` - `1` @@ -6361,9 +6676,9 @@ - :good:`100%` * - mlir/include/mlir/Conversion/GPUToNVVM - `1` - - `0` - `1` - - :none:`0%` + - `0` + - :good:`100%` * - mlir/include/mlir/Conversion/GPUToROCDL - `2` - `2` @@ -6449,6 +6764,11 @@ - `1` - `0` - :good:`100%` + * - mlir/include/mlir/Conversion/SCFToControlFlow + - `1` + - `1` + - `0` + - :good:`100%` * - mlir/include/mlir/Conversion/SCFToGPU - `2` - `2` @@ -6464,11 +6784,6 @@ - `2` - `0` - :good:`100%` - * - mlir/include/mlir/Conversion/SCFToStandard - - `1` - - `1` - - `0` - - :good:`100%` * - mlir/include/mlir/Conversion/ShapeToStandard - `1` - `1` @@ -6484,7 +6799,7 @@ - `2` - `0` - :good:`100%` - * - mlir/include/mlir/Conversion/StandardToSPIRV + * - mlir/include/mlir/Conversion/TensorToSPIRV - `2` - `2` - `0` @@ -6535,8 +6850,13 @@ - `0` - :good:`100%` * - mlir/include/mlir/Dialect/Affine - - `2` - - `2` + - `4` + - `4` + - `0` + - :good:`100%` + * - mlir/include/mlir/Dialect/Affine/Analysis + - `5` + - `5` - `0` - :good:`100%` * - mlir/include/mlir/Dialect/Affine/IR @@ -6555,6 +6875,11 @@ - `0` - :good:`100%` * - mlir/include/mlir/Dialect/Arithmetic/Transforms + - `2` + - `2` + - `0` + - :good:`100%` + * - mlir/include/mlir/Dialect/Arithmetic/Utils - `1` - `1` - `0` @@ -6580,13 +6905,13 @@ - `0` - :good:`100%` * - mlir/include/mlir/Dialect/Bufferization/IR - - `2` - - `2` + - `3` + - `3` - `0` - :good:`100%` * - mlir/include/mlir/Dialect/Bufferization/Transforms - - `2` - - `2` + - `4` + - `4` - `0` - :good:`100%` * - mlir/include/mlir/Dialect/Complex/IR @@ -6594,6 +6919,11 @@ - `1` - `0` - :good:`100%` + * - mlir/include/mlir/Dialect/ControlFlow/IR + - `2` + - `2` + - `0` + - :good:`100%` * - mlir/include/mlir/Dialect/DLTI - `2` - `2` @@ -6604,6 +6934,16 @@ - `1` - `0` - :good:`100%` + * - mlir/include/mlir/Dialect/Func/IR + - `1` + - `1` + - `0` + - :good:`100%` + * - mlir/include/mlir/Dialect/Func/Transforms + - `3` + - `3` + - `0` + - :good:`100%` * - mlir/include/mlir/Dialect/GPU - `5` - `5` @@ -6620,18 +6960,18 @@ - `0` - :good:`100%` * - mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize - - `10` - - `9` - - `1` - - :part:`90%` + - `2` + - `2` + - `0` + - :good:`100%` * - mlir/include/mlir/Dialect/Linalg/IR - `2` - `2` - `0` - :good:`100%` * - mlir/include/mlir/Dialect/Linalg/Transforms - - `4` - - `4` + - `5` + - `5` - `0` - :good:`100%` * - mlir/include/mlir/Dialect/Linalg/Utils @@ -6665,8 +7005,8 @@ - `0` - :good:`100%` * - mlir/include/mlir/Dialect/MemRef/Transforms - - `1` - - `1` + - `2` + - `2` - `0` - :good:`100%` * - mlir/include/mlir/Dialect/MemRef/Utils @@ -6700,10 +7040,15 @@ - `0` - :good:`100%` * - mlir/include/mlir/Dialect/SCF - - `5` - `4` - - `1` - - :part:`80%` + - `4` + - `0` + - :good:`100%` + * - mlir/include/mlir/Dialect/SCF/Utils + - `2` + - `2` + - `0` + - :good:`100%` * - mlir/include/mlir/Dialect/Shape/IR - `1` - `1` @@ -6719,6 +7064,11 @@ - `1` - `0` - :good:`100%` + * - mlir/include/mlir/Dialect/SparseTensor/Pipelines + - `1` + - `1` + - `0` + - :good:`100%` * - mlir/include/mlir/Dialect/SparseTensor/Transforms - `1` - `1` @@ -6749,27 +7099,17 @@ - `1` - `0` - :good:`100%` - * - mlir/include/mlir/Dialect/StandardOps/IR - - `1` - - `1` - - `0` - - :good:`100%` - * - mlir/include/mlir/Dialect/StandardOps/Transforms - - `4` - - `4` - - `0` - - :good:`100%` - * - mlir/include/mlir/Dialect/StandardOps/Utils - - `1` - - `1` - - `0` - - :good:`100%` * - mlir/include/mlir/Dialect/Tensor/IR - - `2` - - `2` + - `3` + - `3` - `0` - :good:`100%` * - mlir/include/mlir/Dialect/Tensor/Transforms + - `3` + - `3` + - `0` + - :good:`100%` + * - mlir/include/mlir/Dialect/Tensor/Utils - `1` - `1` - `0` @@ -6785,20 +7125,30 @@ - `0` - :good:`100%` * - mlir/include/mlir/Dialect/Tosa/Utils - - `2` - - `2` + - `3` + - `3` - `0` - :good:`100%` * - mlir/include/mlir/Dialect/Utils - - `3` - - `3` + - `4` + - `4` - `0` - :good:`100%` - * - mlir/include/mlir/Dialect/Vector + * - mlir/include/mlir/Dialect/Vector/IR + - `1` + - `1` + - `0` + - :good:`100%` + * - mlir/include/mlir/Dialect/Vector/Transforms - `4` - `4` - `0` - :good:`100%` + * - mlir/include/mlir/Dialect/Vector/Utils + - `1` + - `1` + - `0` + - :good:`100%` * - mlir/include/mlir/Dialect/X86Vector - `2` - `2` @@ -6806,19 +7156,19 @@ - :good:`100%` * - mlir/include/mlir/ExecutionEngine - `8` - - `6` - - `2` - - :part:`75%` + - `7` + - `1` + - :part:`87%` * - mlir/include/mlir/Interfaces - `14` - `13` - `1` - :part:`92%` * - mlir/include/mlir/IR - - `50` + - `49` - `29` - - `21` - - :part:`57%` + - `20` + - :part:`59%` * - mlir/include/mlir/Parser - `1` - `1` @@ -6929,16 +7279,26 @@ - `2` - `2` - :part:`50%` + * - mlir/include/mlir/Tools/PDLL/CodeGen + - `2` + - `2` + - `0` + - :good:`100%` + * - mlir/include/mlir/Tools/PDLL/ODS + - `4` + - `4` + - `0` + - :good:`100%` * - mlir/include/mlir/Tools/PDLL/Parser - `1` - `1` - `0` - :good:`100%` * - mlir/include/mlir/Transforms - - `12` - - `10` + - `9` + - `7` - `2` - - :part:`83%` + - :part:`77%` * - mlir/include/mlir-c - `15` - `15` @@ -6950,13 +7310,13 @@ - `0` - :good:`100%` * - mlir/include/mlir-c/Dialect - - `9` - - `9` + - `11` + - `11` - `0` - :good:`100%` * - mlir/lib/Analysis - - `14` - - `14` + - `7` + - `7` - `0` - :good:`100%` * - mlir/lib/Analysis/AliasAnalysis @@ -6965,15 +7325,15 @@ - `0` - :good:`100%` * - mlir/lib/Analysis/Presburger - - `5` - - `5` + - `8` + - `8` - `0` - :good:`100%` * - mlir/lib/Bindings/Python - - `22` - - `21` - - `1` - - :part:`95%` + - `23` + - `23` + - `0` + - :good:`100%` * - mlir/lib/Bindings/Python/Conversions - `1` - `1` @@ -6995,8 +7355,8 @@ - `0` - :good:`100%` * - mlir/lib/CAPI/Dialect - - `13` - - `13` + - `15` + - `15` - `0` - :good:`100%` * - mlir/lib/CAPI/ExecutionEngine @@ -7069,11 +7429,26 @@ - `1` - `0` - :good:`100%` - * - mlir/lib/Conversion/GPUCommon - - `5` - - `5` + * - mlir/lib/Conversion/ControlFlowToLLVM + - `1` + - `1` + - `0` + - :good:`100%` + * - mlir/lib/Conversion/ControlFlowToSPIRV + - `2` + - `2` + - `0` + - :good:`100%` + * - mlir/lib/Conversion/FuncToSPIRV + - `2` + - `2` - `0` - :good:`100%` + * - mlir/lib/Conversion/GPUCommon + - `5` + - `4` + - `1` + - :part:`80%` * - mlir/lib/Conversion/GPUToNVVM - `2` - `2` @@ -7106,9 +7481,9 @@ - :part:`50%` * - mlir/lib/Conversion/LinalgToStandard - `1` - - `1` - `0` - - :good:`100%` + - `1` + - :none:`0%` * - mlir/lib/Conversion/LLVMCommon - `8` - `8` @@ -7164,6 +7539,11 @@ - `1` - `0` - :good:`100%` + * - mlir/lib/Conversion/SCFToControlFlow + - `1` + - `1` + - `0` + - :good:`100%` * - mlir/lib/Conversion/SCFToGPU - `2` - `2` @@ -7179,11 +7559,6 @@ - `2` - `0` - :good:`100%` - * - mlir/lib/Conversion/SCFToStandard - - `1` - - `1` - - `0` - - :good:`100%` * - mlir/lib/Conversion/ShapeToStandard - `2` - `2` @@ -7204,7 +7579,7 @@ - `1` - `0` - :good:`100%` - * - mlir/lib/Conversion/StandardToSPIRV + * - mlir/lib/Conversion/TensorToSPIRV - `2` - `2` - `0` @@ -7226,9 +7601,9 @@ - :good:`100%` * - mlir/lib/Conversion/VectorToGPU - `1` - - `1` - `0` - - :good:`100%` + - `1` + - :none:`0%` * - mlir/lib/Conversion/VectorToLLVM - `2` - `2` @@ -7254,19 +7629,24 @@ - `1` - `0` - :good:`100%` + * - mlir/lib/Dialect/Affine/Analysis + - `5` + - `5` + - `0` + - :good:`100%` * - mlir/lib/Dialect/Affine/IR - `3` - `2` - `1` - :part:`66%` * - mlir/lib/Dialect/Affine/Transforms - - `11` - - `11` + - `14` + - `14` - `0` - :good:`100%` * - mlir/lib/Dialect/Affine/Utils - - `1` - - `1` + - `3` + - `3` - `0` - :good:`100%` * - mlir/lib/Dialect/AMX/IR @@ -7281,12 +7661,17 @@ - :good:`100%` * - mlir/lib/Dialect/Arithmetic/IR - `2` - - `2` - - `0` - - :good:`100%` + - `1` + - `1` + - :part:`50%` * - mlir/lib/Dialect/Arithmetic/Transforms + - `4` - `3` - - `3` + - `1` + - :part:`75%` + * - mlir/lib/Dialect/Arithmetic/Utils + - `1` + - `1` - `0` - :good:`100%` * - mlir/lib/Dialect/ArmNeon/IR @@ -7315,13 +7700,13 @@ - `0` - :good:`100%` * - mlir/lib/Dialect/Bufferization/IR - - `3` - - `3` + - `4` + - `4` - `0` - :good:`100%` * - mlir/lib/Dialect/Bufferization/Transforms - - `3` - - `3` + - `7` + - `7` - `0` - :good:`100%` * - mlir/lib/Dialect/Complex/IR @@ -7329,6 +7714,11 @@ - `2` - `0` - :good:`100%` + * - mlir/lib/Dialect/ControlFlow/IR + - `1` + - `1` + - `0` + - :good:`100%` * - mlir/lib/Dialect/DLTI - `2` - `2` @@ -7339,6 +7729,16 @@ - `1` - `0` - :good:`100%` + * - mlir/lib/Dialect/Func/IR + - `1` + - `1` + - `0` + - :good:`100%` + * - mlir/lib/Dialect/Func/Transforms + - `4` + - `4` + - `0` + - :good:`100%` * - mlir/lib/Dialect/GPU/IR - `1` - `1` @@ -7355,18 +7755,18 @@ - `0` - :good:`100%` * - mlir/lib/Dialect/Linalg/ComprehensiveBufferize - - `10` - - `9` - - `1` - - :part:`90%` + - `2` + - `2` + - `0` + - :good:`100%` * - mlir/lib/Dialect/Linalg/IR - `3` - `3` - `0` - :good:`100%` * - mlir/lib/Dialect/Linalg/Transforms - - `23` - - `23` + - `25` + - `25` - `0` - :good:`100%` * - mlir/lib/Dialect/Linalg/Utils @@ -7376,9 +7776,9 @@ - :good:`100%` * - mlir/lib/Dialect/LLVMIR/IR - `7` - - `7` - - `0` - - :good:`100%` + - `5` + - `2` + - :part:`71%` * - mlir/lib/Dialect/LLVMIR/Transforms - `2` - `2` @@ -7400,10 +7800,10 @@ - `0` - :good:`100%` * - mlir/lib/Dialect/MemRef/Transforms - - `2` - - `2` - - `0` - - :good:`100%` + - `7` + - `6` + - `1` + - :part:`85%` * - mlir/lib/Dialect/MemRef/Utils - `1` - `1` @@ -7454,6 +7854,11 @@ - `11` - `1` - :part:`91%` + * - mlir/lib/Dialect/SCF/Utils + - `2` + - `2` + - `0` + - :good:`100%` * - mlir/lib/Dialect/Shape/IR - `1` - `1` @@ -7469,11 +7874,16 @@ - `1` - `0` - :good:`100%` - * - mlir/lib/Dialect/SparseTensor/Transforms - - `3` - - `3` + * - mlir/lib/Dialect/SparseTensor/Pipelines + - `1` + - `1` - `0` - :good:`100%` + * - mlir/lib/Dialect/SparseTensor/Transforms + - `5` + - `4` + - `1` + - :part:`80%` * - mlir/lib/Dialect/SparseTensor/Utils - `1` - `1` @@ -7490,65 +7900,65 @@ - `0` - :good:`100%` * - mlir/lib/Dialect/SPIRV/Transforms + - `7` - `6` - - `5` - `1` - - :part:`83%` + - :part:`85%` * - mlir/lib/Dialect/SPIRV/Utils - `1` - `1` - `0` - :good:`100%` - * - mlir/lib/Dialect/StandardOps/IR - - `1` - - `1` + * - mlir/lib/Dialect/Tensor/IR + - `4` + - `4` - `0` - :good:`100%` - * - mlir/lib/Dialect/StandardOps/Transforms - - `8` - - `8` + * - mlir/lib/Dialect/Tensor/Transforms + - `4` + - `4` - `0` - :good:`100%` - * - mlir/lib/Dialect/StandardOps/Utils + * - mlir/lib/Dialect/Tensor/Utils - `1` - `1` - `0` - :good:`100%` - * - mlir/lib/Dialect/Tensor/IR - - `3` - - `3` - - `0` - - :good:`100%` - * - mlir/lib/Dialect/Tensor/Transforms - - `2` - - `2` - - `0` - - :good:`100%` * - mlir/lib/Dialect/Tosa/IR - `1` - `1` - `0` - :good:`100%` * - mlir/lib/Dialect/Tosa/Transforms + - `6` + - `6` + - `0` + - :good:`100%` + * - mlir/lib/Dialect/Tosa/Utils + - `2` + - `2` + - `0` + - :good:`100%` + * - mlir/lib/Dialect/Utils - `4` - `4` - `0` - :good:`100%` - * - mlir/lib/Dialect/Tosa/Utils + * - mlir/lib/Dialect/Vector/IR - `1` + - `0` - `1` + - :none:`0%` + * - mlir/lib/Dialect/Vector/Transforms + - `11` + - `11` - `0` - :good:`100%` - * - mlir/lib/Dialect/Utils - - `3` - - `3` + * - mlir/lib/Dialect/Vector/Utils + - `1` + - `1` - `0` - :good:`100%` - * - mlir/lib/Dialect/Vector - - `9` - - `8` - - `1` - - :part:`88%` * - mlir/lib/Dialect/X86Vector/IR - `1` - `1` @@ -7561,24 +7971,24 @@ - :good:`100%` * - mlir/lib/ExecutionEngine - `9` - - `8` - - `1` - - :part:`88%` + - `9` + - `0` + - :good:`100%` * - mlir/lib/Interfaces - `12` - `12` - `0` - :good:`100%` * - mlir/lib/IR - - `37` - - `34` - - `3` - - :part:`91%` + - `38` + - `31` + - `7` + - :part:`81%` * - mlir/lib/Parser - `14` - - `14` - - `0` - - :good:`100%` + - `10` + - `4` + - :part:`71%` * - mlir/lib/Pass - `8` - `6` @@ -7611,9 +8021,9 @@ - :good:`100%` * - mlir/lib/Target/LLVMIR - `7` - - `7` - - `0` - - :good:`100%` + - `6` + - `1` + - :part:`85%` * - mlir/lib/Target/LLVMIR/Dialect/AMX - `1` - `1` @@ -7676,9 +8086,9 @@ - :part:`75%` * - mlir/lib/Tools/mlir-lsp-server - `5` - - `5` - - `0` - - :good:`100%` + - `4` + - `1` + - :part:`80%` * - mlir/lib/Tools/mlir-lsp-server/lsp - `6` - `4` @@ -7694,19 +8104,29 @@ - `5` - `1` - :part:`83%` + * - mlir/lib/Tools/PDLL/CodeGen + - `2` + - `1` + - `1` + - :part:`50%` + * - mlir/lib/Tools/PDLL/ODS + - `3` + - `3` + - `0` + - :good:`100%` * - mlir/lib/Tools/PDLL/Parser - `3` - `1` - `2` - :part:`33%` * - mlir/lib/Transforms - - `19` - - `16` - - `3` + - `13` + - `11` + - `2` - :part:`84%` * - mlir/lib/Transforms/Utils - - `8` - - `8` + - `6` + - `6` - `0` - :good:`100%` * - mlir/lib/Translation @@ -7721,9 +8141,9 @@ - :good:`100%` * - mlir/tools/mlir-linalg-ods-gen - `1` - - `0` - `1` - - :none:`0%` + - `0` + - :good:`100%` * - mlir/tools/mlir-lsp-server - `1` - `1` @@ -7769,14 +8189,9 @@ - `4` - `0` - :good:`100%` - * - mlir/unittests/Analysis - - `5` - - `5` - - `0` - - :good:`100%` * - mlir/unittests/Analysis/Presburger - - `4` - - `4` + - `8` + - `8` - `0` - :good:`100%` * - mlir/unittests/Conversion/PDLToPDLInterp @@ -7789,12 +8204,12 @@ - `1` - `0` - :good:`100%` - * - mlir/unittests/Dialect/Quant - - `1` - - `1` + * - mlir/unittests/Dialect/Affine/Analysis + - `3` + - `3` - `0` - :good:`100%` - * - mlir/unittests/Dialect/SCF + * - mlir/unittests/Dialect/Quant - `1` - `1` - `0` @@ -7830,8 +8245,8 @@ - `0` - :good:`100%` * - mlir/unittests/Pass - - `2` - - `2` + - `3` + - `3` - `0` - :good:`100%` * - mlir/unittests/Rewrite @@ -7866,49 +8281,14 @@ - :good:`100%` * - openmp/libomptarget/DeviceRTL/src - `12` - - `10` - - `2` - - :part:`83%` - * - openmp/libomptarget/deviceRTLs - - `2` - - `2` - - `0` - - :good:`100%` - * - openmp/libomptarget/deviceRTLs/amdgcn/src - - `2` - - `2` - - `0` - - :good:`100%` - * - openmp/libomptarget/deviceRTLs/common - - `7` - - `6` - - `1` - - :part:`85%` - * - openmp/libomptarget/deviceRTLs/common/include - - `1` - - `1` - - `0` - - :good:`100%` - * - openmp/libomptarget/deviceRTLs/common/include/target - - `1` - - `1` - - `0` - - :good:`100%` - * - openmp/libomptarget/deviceRTLs/common/src - - `1` - - `1` - - `0` - - :good:`100%` - * - openmp/libomptarget/deviceRTLs/nvptx/src - - `2` - - `2` - - `0` - - :good:`100%` + - `9` + - `3` + - :part:`75%` * - openmp/libomptarget/include + - `9` - `8` - - `8` - - `0` - - :good:`100%` + - `1` + - :part:`88%` * - openmp/libomptarget/plugins/amdgpu/dynamic_hsa - `3` - `2` @@ -7941,9 +8321,9 @@ - :good:`100%` * - openmp/libomptarget/plugins/cuda/src - `1` - - `1` - `0` - - :good:`100%` + - `1` + - :none:`0%` * - openmp/libomptarget/plugins/generic-elf-64bit/src - `1` - `1` @@ -7956,9 +8336,9 @@ - :good:`100%` * - openmp/libomptarget/plugins/remote/lib - `1` - - `1` - `0` - - :good:`100%` + - `1` + - :none:`0%` * - openmp/libomptarget/plugins/remote/server - `3` - `3` @@ -7975,10 +8355,10 @@ - `0` - :good:`100%` * - openmp/libomptarget/src + - `7` - `6` - - `4` - - `2` - - :part:`66%` + - `1` + - :part:`85%` * - openmp/libomptarget/tools/deviceinfo - `1` - `1` @@ -7991,9 +8371,9 @@ - :good:`100%` * - openmp/runtime/src - `75` - - `66` - - `9` - - :part:`88%` + - `65` + - `10` + - :part:`86%` * - openmp/runtime/src/thirdparty/ittnotify - `6` - `5` @@ -8151,9 +8531,9 @@ - :good:`100%` * - pstl/include/pstl/internal - `23` - - `12` - - `11` - - :part:`52%` + - `16` + - `7` + - :part:`69%` * - pstl/include/pstl/internal/omp - `11` - `8` @@ -8185,7 +8565,7 @@ - `1` - :part:`50%` * - Total - - :total:`15902` - - :total:`8407` - - :total:`7495` - - :total:`52%` + - :total:`16432` + - :total:`8857` + - :total:`7575` + - :total:`53%` diff --git a/clang/docs/DataFlowAnalysisIntro.md b/clang/docs/DataFlowAnalysisIntro.md --- a/clang/docs/DataFlowAnalysisIntro.md +++ b/clang/docs/DataFlowAnalysisIntro.md @@ -287,7 +287,7 @@ (Note that there are other ways to write this equation that produce higher precision analysis results. The trick is to keep exploring the execution paths -separately and delay joining until later. Hoowever, we won't discuss those +separately and delay joining until later. However, we won't discuss those variations here.) To make a conclusion about all paths through the program, we repeat this diff --git a/clang/docs/analyzer/checkers.rst b/clang/docs/analyzer/checkers.rst --- a/clang/docs/analyzer/checkers.rst +++ b/clang/docs/analyzer/checkers.rst @@ -2358,7 +2358,7 @@ ``_IO_getc``, ``fdopen``, ``fopen``, ``freopen``, ``get_current_dir_name``, ``getch``, ``getchar``, ``getchar_unlocked``, ``getwd``, ``getcwd``, ``getgroups``, ``gethostname``, ``getlogin``, ``getlogin_r``, ``getnameinfo``, ``gets``, ``gets_s``, ``getseuserbyname``, ``readlink``, ``readlinkat``, ``scanf``, ``scanf_s``, ``socket``, ``wgetch`` Default propagations defined by ``GenericTaintChecker``: -``atoi``, ``atol``, ``atoll``, ``fgetc``, ``fgetln``, ``fgets``, ``fscanf``, ``sscanf``, ``getc``, ``getc_unlocked``, ``getdelim``, ``getline``, ``getw``, ``pread``, ``read``, ``strchr``, ``strrchr``, ``tolower``, ``toupper`` +``atoi``, ``atol``, ``atoll``, ``basename``, ``dirname``, ``fgetc``, ``fgetln``, ``fgets``, ``fnmatch``, ``fread``, ``fscanf``, ``fscanf_s``, ``index``, ``inflate``, ``isalnum``, ``isalpha``, ``isascii``, ``isblank``, ``iscntrl``, ``isdigit``, ``isgraph``, ``islower``, ``isprint``, ``ispunct``, ``isspace``, ``isupper``, ``isxdigit``, ``memchr``, ``memrchr``, ``sscanf``, ``getc``, ``getc_unlocked``, ``getdelim``, ``getline``, ``getw``, ``memcmp``, ``memcpy``, ``memmem``, ``memmove``, ``mbtowc``, ``pread``, ``qsort``, ``qsort_r``, ``rawmemchr``, ``read``, ``recv``, ``recvfrom``, ``rindex``, ``strcasestr``, ``strchr``, ``strchrnul``, ``strcasecmp``, ``strcmp``, ``strcspn``, ``strlen``, ``strncasecmp``, ``strncmp``, ``strndup``, ``strndupa``, ``strnlen``, ``strpbrk``, ``strrchr``, ``strsep``, ``strspn``, ``strstr``, ``strtol``, ``strtoll``, ``strtoul``, ``strtoull``, ``tolower``, ``toupper``, ``ttyname``, ``ttyname_r``, ``wctomb``, ``wcwidth`` Default sinks defined in ``GenericTaintChecker``: ``printf``, ``setproctitle``, ``system``, ``popen``, ``execl``, ``execle``, ``execlp``, ``execv``, ``execvp``, ``execvP``, ``execve``, ``dlopen``, ``memcpy``, ``memmove``, ``strncpy``, ``strndup``, ``malloc``, ``calloc``, ``alloca``, ``memccpy``, ``realloc``, ``bcopy`` diff --git a/clang/docs/tools/clang-formatted-files.txt b/clang/docs/tools/clang-formatted-files.txt --- a/clang/docs/tools/clang-formatted-files.txt +++ b/clang/docs/tools/clang-formatted-files.txt @@ -1,3 +1,115 @@ +bolt/include/bolt/Core/BinaryData.h +bolt/include/bolt/Core/BinaryEmitter.h +bolt/include/bolt/Core/BinaryLoop.h +bolt/include/bolt/Core/BinarySection.h +bolt/include/bolt/Core/DebugData.h +bolt/include/bolt/Core/Exceptions.h +bolt/include/bolt/Core/JumpTable.h +bolt/include/bolt/Core/MCPlus.h +bolt/include/bolt/Core/MCPlusBuilder.h +bolt/include/bolt/Core/ParallelUtilities.h +bolt/include/bolt/Passes/ADRRelaxationPass.h +bolt/include/bolt/Passes/Aligner.h +bolt/include/bolt/Passes/AllocCombiner.h +bolt/include/bolt/Passes/AsmDump.h +bolt/include/bolt/Passes/BinaryFunctionCallGraph.h +bolt/include/bolt/Passes/BinaryPasses.h +bolt/include/bolt/Passes/CacheMetrics.h +bolt/include/bolt/Passes/CallGraph.h +bolt/include/bolt/Passes/CallGraphWalker.h +bolt/include/bolt/Passes/DataflowAnalysis.h +bolt/include/bolt/Passes/DataflowInfoManager.h +bolt/include/bolt/Passes/DominatorAnalysis.h +bolt/include/bolt/Passes/FrameAnalysis.h +bolt/include/bolt/Passes/FrameOptimizer.h +bolt/include/bolt/Passes/HFSort.h +bolt/include/bolt/Passes/IdenticalCodeFolding.h +bolt/include/bolt/Passes/IndirectCallPromotion.h +bolt/include/bolt/Passes/Inliner.h +bolt/include/bolt/Passes/Instrumentation.h +bolt/include/bolt/Passes/InstrumentationSummary.h +bolt/include/bolt/Passes/JTFootprintReduction.h +bolt/include/bolt/Passes/LivenessAnalysis.h +bolt/include/bolt/Passes/LongJmp.h +bolt/include/bolt/Passes/LoopInversionPass.h +bolt/include/bolt/Passes/MCF.h +bolt/include/bolt/Passes/PatchEntries.h +bolt/include/bolt/Passes/PLTCall.h +bolt/include/bolt/Passes/ReachingDefOrUse.h +bolt/include/bolt/Passes/ReachingInsns.h +bolt/include/bolt/Passes/RegAnalysis.h +bolt/include/bolt/Passes/RegReAssign.h +bolt/include/bolt/Passes/ReorderAlgorithm.h +bolt/include/bolt/Passes/ReorderData.h +bolt/include/bolt/Passes/ReorderFunctions.h +bolt/include/bolt/Passes/ReorderUtils.h +bolt/include/bolt/Passes/RetpolineInsertion.h +bolt/include/bolt/Passes/ShrinkWrapping.h +bolt/include/bolt/Passes/SplitFunctions.h +bolt/include/bolt/Passes/StackAllocationAnalysis.h +bolt/include/bolt/Passes/StackAvailableExpressions.h +bolt/include/bolt/Passes/StackPointerTracking.h +bolt/include/bolt/Passes/StackReachingUses.h +bolt/include/bolt/Passes/StokeInfo.h +bolt/include/bolt/Passes/TailDuplication.h +bolt/include/bolt/Passes/ThreeWayBranch.h +bolt/include/bolt/Passes/ValidateInternalCalls.h +bolt/include/bolt/Passes/VeneerElimination.h +bolt/include/bolt/Profile/BoltAddressTranslation.h +bolt/include/bolt/Profile/DataAggregator.h +bolt/include/bolt/Profile/DataReader.h +bolt/include/bolt/Profile/Heatmap.h +bolt/include/bolt/Profile/ProfileReaderBase.h +bolt/include/bolt/Profile/ProfileYAMLMapping.h +bolt/include/bolt/Profile/YAMLProfileReader.h +bolt/include/bolt/Profile/YAMLProfileWriter.h +bolt/include/bolt/Rewrite/BinaryPassManager.h +bolt/include/bolt/Rewrite/DWARFRewriter.h +bolt/include/bolt/Rewrite/ExecutableFileMemoryManager.h +bolt/include/bolt/Rewrite/MachORewriteInstance.h +bolt/include/bolt/RuntimeLibs/HugifyRuntimeLibrary.h +bolt/include/bolt/RuntimeLibs/InstrumentationRuntimeLibrary.h +bolt/include/bolt/RuntimeLibs/RuntimeLibrary.h +bolt/include/bolt/Utils/CommandLineOpts.h +bolt/include/bolt/Utils/NameResolver.h +bolt/include/bolt/Utils/NameShortener.h +bolt/include/bolt/Utils/Utils.h +bolt/lib/Core/BinaryBasicBlock.cpp +bolt/lib/Core/BinarySection.cpp +bolt/lib/Core/DebugData.cpp +bolt/lib/Core/JumpTable.cpp +bolt/lib/Core/MCPlusBuilder.cpp +bolt/lib/Passes/ADRRelaxationPass.cpp +bolt/lib/Passes/AllocCombiner.cpp +bolt/lib/Passes/AsmDump.cpp +bolt/lib/Passes/BinaryFunctionCallGraph.cpp +bolt/lib/Passes/CacheMetrics.cpp +bolt/lib/Passes/CallGraphWalker.cpp +bolt/lib/Passes/DataflowAnalysis.cpp +bolt/lib/Passes/DataflowInfoManager.cpp +bolt/lib/Passes/HFSort.cpp +bolt/lib/Passes/IndirectCallPromotion.cpp +bolt/lib/Passes/Instrumentation.cpp +bolt/lib/Passes/JTFootprintReduction.cpp +bolt/lib/Passes/LivenessAnalysis.cpp +bolt/lib/Passes/LoopInversionPass.cpp +bolt/lib/Passes/PettisAndHansen.cpp +bolt/lib/Passes/StackAllocationAnalysis.cpp +bolt/lib/Passes/StackPointerTracking.cpp +bolt/lib/Passes/StackReachingUses.cpp +bolt/lib/Passes/TailDuplication.cpp +bolt/lib/Passes/ThreeWayBranch.cpp +bolt/lib/Passes/ValidateInternalCalls.cpp +bolt/lib/Profile/BoltAddressTranslation.cpp +bolt/lib/Profile/Heatmap.cpp +bolt/lib/Profile/ProfileReaderBase.cpp +bolt/lib/RuntimeLibs/HugifyRuntimeLibrary.cpp +bolt/lib/RuntimeLibs/InstrumentationRuntimeLibrary.cpp +bolt/lib/RuntimeLibs/RuntimeLibrary.cpp +bolt/lib/Utils/Utils.cpp +bolt/tools/heatmap/heatmap.cpp +bolt/tools/llvm-bolt-fuzzer/llvm-bolt-fuzzer.cpp +bolt/unittests/Core/MCPlusBuilder.cpp clang/bindings/python/tests/cindex/INPUTS/header1.h clang/bindings/python/tests/cindex/INPUTS/header2.h clang/bindings/python/tests/cindex/INPUTS/header3.h @@ -13,10 +125,19 @@ clang/include/clang/Analysis/Analyses/ExprMutationAnalyzer.h clang/include/clang/Analysis/FlowSensitive/ControlFlowContext.h clang/include/clang/Analysis/FlowSensitive/DataflowAnalysis.h +clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h clang/include/clang/Analysis/FlowSensitive/DataflowLattice.h clang/include/clang/Analysis/FlowSensitive/DataflowWorklist.h +clang/include/clang/Analysis/FlowSensitive/MapLattice.h +clang/include/clang/Analysis/FlowSensitive/MatchSwitch.h +clang/include/clang/Analysis/FlowSensitive/Solver.h +clang/include/clang/Analysis/FlowSensitive/SourceLocationsLattice.h +clang/include/clang/Analysis/FlowSensitive/StorageLocation.h +clang/include/clang/Analysis/FlowSensitive/Transfer.h clang/include/clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h +clang/include/clang/Analysis/FlowSensitive/Value.h +clang/include/clang/Analysis/FlowSensitive/WatchedLiteralsSolver.h clang/include/clang/APINotes/APINotesYAMLCompiler.h clang/include/clang/APINotes/Types.h clang/include/clang/AST/AST.h @@ -138,10 +259,12 @@ clang/include/clang/Tooling/ASTDiff/ASTDiffInternal.h clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h clang/include/clang/Tooling/DependencyScanning/DependencyScanningService.h +clang/include/clang/Tooling/DependencyScanning/DependencyScanningTool.h clang/include/clang/Tooling/DependencyScanning/DependencyScanningWorker.h clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h clang/include/clang/Tooling/Inclusions/HeaderIncludes.h clang/include/clang/Tooling/Inclusions/IncludeStyle.h +clang/include/clang/Tooling/Inclusions/StandardLibrary.h clang/include/clang/Tooling/Refactoring/ASTSelection.h clang/include/clang/Tooling/Refactoring/AtomicChange.h clang/include/clang/Tooling/Refactoring/Lookup.h @@ -166,6 +289,11 @@ clang/include/clang/Tooling/Syntax/Nodes.h clang/include/clang/Tooling/Syntax/Tokens.h clang/include/clang/Tooling/Syntax/Tree.h +clang/include/clang/Tooling/Syntax/Pseudo/Grammar.h +clang/include/clang/Tooling/Syntax/Pseudo/LRGraph.h +clang/include/clang/Tooling/Syntax/Pseudo/LRTable.h +clang/include/clang/Tooling/Syntax/Pseudo/Preprocess.h +clang/include/clang/Tooling/Syntax/Pseudo/Token.h clang/include/clang/Tooling/Transformer/MatchConsumer.h clang/include/clang/Tooling/Transformer/Parsing.h clang/include/clang/Tooling/Transformer/RangeSelector.h @@ -178,9 +306,13 @@ clang/lib/Analysis/CalledOnceCheck.cpp clang/lib/Analysis/CloneDetection.cpp clang/lib/Analysis/CodeInjector.cpp -clang/lib/Analysis/ExprMutationAnalyzer.cpp clang/lib/Analysis/FlowSensitive/ControlFlowContext.cpp +clang/lib/Analysis/FlowSensitive/DataflowAnalysisContext.cpp +clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp +clang/lib/Analysis/FlowSensitive/SourceLocationsLattice.cpp +clang/lib/Analysis/FlowSensitive/Transfer.cpp clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp +clang/lib/Analysis/FlowSensitive/WatchedLiteralsSolver.cpp clang/lib/Analysis/plugins/CheckerDependencyHandling/CheckerDependencyHandling.cpp clang/lib/Analysis/plugins/SampleAnalyzer/MainCallChecker.cpp clang/lib/APINotes/APINotesFormat.h @@ -246,7 +378,6 @@ clang/lib/Basic/Targets/WebAssembly.cpp clang/lib/Basic/Targets/WebAssembly.h clang/lib/Basic/Targets/XCore.cpp -clang/lib/CodeGen/CGCall.h clang/lib/CodeGen/CGCUDARuntime.cpp clang/lib/CodeGen/CGLoopInfo.cpp clang/lib/CodeGen/CGLoopInfo.h @@ -262,6 +393,7 @@ clang/lib/Driver/XRayArgs.cpp clang/lib/Driver/ToolChains/AIX.cpp clang/lib/Driver/ToolChains/AIX.h +clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp clang/lib/Driver/ToolChains/AMDGPUOpenMP.h clang/lib/Driver/ToolChains/Ananas.h clang/lib/Driver/ToolChains/AVR.cpp @@ -313,6 +445,8 @@ clang/lib/Format/BreakableToken.h clang/lib/Format/ContinuationIndenter.cpp clang/lib/Format/ContinuationIndenter.h +clang/lib/Format/DefinitionBlockSeparator.cpp +clang/lib/Format/DefinitionBlockSeparator.h clang/lib/Format/Encoding.h clang/lib/Format/Format.cpp clang/lib/Format/FormatInternal.h @@ -340,6 +474,7 @@ clang/lib/Format/UsingDeclarationsSorter.h clang/lib/Format/WhitespaceManager.cpp clang/lib/Format/WhitespaceManager.h +clang/lib/Frontend/ExtractAPIConsumer.cpp clang/lib/Frontend/FrontendOptions.cpp clang/lib/Frontend/InterfaceStubFunctionsConsumer.cpp clang/lib/Frontend/SerializedDiagnosticReader.cpp @@ -352,18 +487,15 @@ clang/lib/Headers/nmmintrin.h clang/lib/Headers/s390intrin.h clang/lib/Headers/stdalign.h -clang/lib/Headers/stdnoreturn.h clang/lib/Headers/wmmintrin.h clang/lib/Headers/xtestintrin.h clang/lib/Headers/__clang_cuda_texture_intrinsics.h -clang/lib/Headers/__clang_hip_cmath.h clang/lib/Headers/__clang_hip_libdevice_declares.h clang/lib/Headers/__stddef_max_align_t.h clang/lib/Headers/openmp_wrappers/complex.h clang/lib/Headers/openmp_wrappers/complex_cmath.h clang/lib/Headers/openmp_wrappers/math.h clang/lib/Headers/openmp_wrappers/time.h -clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h clang/lib/Headers/ppc_wrappers/mmintrin.h clang/lib/Headers/ppc_wrappers/smmintrin.h clang/lib/Index/FileIndexRecord.cpp @@ -378,13 +510,15 @@ clang/lib/Parse/ParseOpenMP.cpp clang/lib/Sema/CodeCompleteConsumer.cpp clang/lib/Sema/CoroutineStmtBuilder.h -clang/lib/Sema/SemaOpenMP.cpp clang/lib/Sema/SemaSYCL.cpp clang/lib/Sema/UsedDeclVisitor.h clang/lib/Serialization/InMemoryModuleCache.cpp clang/lib/Serialization/ModuleFileExtension.cpp clang/lib/StaticAnalyzer/Checkers/AllocationState.h clang/lib/StaticAnalyzer/Checkers/CheckPlacementNew.cpp +clang/lib/StaticAnalyzer/Checkers/ErrnoModeling.cpp +clang/lib/StaticAnalyzer/Checkers/ErrnoModeling.h +clang/lib/StaticAnalyzer/Checkers/ErrnoTesterChecker.cpp clang/lib/StaticAnalyzer/Checkers/ExprInspectionChecker.cpp clang/lib/StaticAnalyzer/Checkers/FuchsiaHandleChecker.cpp clang/lib/StaticAnalyzer/Checkers/InterCheckerAPI.h @@ -433,12 +567,14 @@ clang/lib/Tooling/StandaloneExecution.cpp clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp clang/lib/Tooling/DependencyScanning/DependencyScanningService.cpp +clang/lib/Tooling/DependencyScanning/DependencyScanningTool.cpp clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp clang/lib/Tooling/DumpTool/APIData.h clang/lib/Tooling/DumpTool/ASTSrcLocProcessor.h clang/lib/Tooling/DumpTool/ClangSrcLocDump.cpp clang/lib/Tooling/Inclusions/HeaderIncludes.cpp clang/lib/Tooling/Inclusions/IncludeStyle.cpp +clang/lib/Tooling/Inclusions/StandardLibrary.cpp clang/lib/Tooling/Refactoring/ASTSelection.cpp clang/lib/Tooling/Refactoring/Lookup.cpp clang/lib/Tooling/Refactoring/RefactoringActions.cpp @@ -451,6 +587,14 @@ clang/lib/Tooling/Syntax/Nodes.cpp clang/lib/Tooling/Syntax/Synthesis.cpp clang/lib/Tooling/Syntax/Tree.cpp +clang/lib/Tooling/Syntax/Pseudo/Grammar.cpp +clang/lib/Tooling/Syntax/Pseudo/GrammarBNF.cpp +clang/lib/Tooling/Syntax/Pseudo/Lex.cpp +clang/lib/Tooling/Syntax/Pseudo/LRGraph.cpp +clang/lib/Tooling/Syntax/Pseudo/LRTable.cpp +clang/lib/Tooling/Syntax/Pseudo/LRTableBuild.cpp +clang/lib/Tooling/Syntax/Pseudo/Preprocess.cpp +clang/lib/Tooling/Syntax/Pseudo/Token.cpp clang/lib/Tooling/Transformer/Parsing.cpp clang/lib/Tooling/Transformer/SourceCodeBuilders.cpp clang/lib/Tooling/Transformer/Stencil.cpp @@ -463,8 +607,11 @@ clang/tools/clang-fuzzer/ExampleClangLLVMProtoFuzzer.cpp clang/tools/clang-fuzzer/ExampleClangLoopProtoFuzzer.cpp clang/tools/clang-fuzzer/handle-llvm/handle_llvm.h +clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp +clang/tools/clang-linker-wrapper/OffloadWrapper.cpp clang/tools/clang-nvlink-wrapper/ClangNvlinkWrapper.cpp clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp +clang/tools/clang-pseudo/ClangPseudo.cpp clang/tools/clang-refactor/ClangRefactor.cpp clang/tools/clang-refactor/TestSupport.cpp clang/tools/clang-refactor/TestSupport.h @@ -482,7 +629,15 @@ clang/tools/scan-build-py/tests/functional/src/include/clean-one.h clang/unittests/Analysis/CFGBuildResult.h clang/unittests/Analysis/MacroExpansionContextTest.cpp +clang/unittests/Analysis/FlowSensitive/DataflowAnalysisContextTest.cpp +clang/unittests/Analysis/FlowSensitive/DataflowEnvironmentTest.cpp +clang/unittests/Analysis/FlowSensitive/MapLatticeTest.cpp +clang/unittests/Analysis/FlowSensitive/MatchSwitchTest.cpp +clang/unittests/Analysis/FlowSensitive/MultiVarConstantPropagationTest.cpp +clang/unittests/Analysis/FlowSensitive/NoopAnalysis.h clang/unittests/Analysis/FlowSensitive/SingleVarConstantPropagationTest.cpp +clang/unittests/Analysis/FlowSensitive/SolverTest.cpp +clang/unittests/Analysis/FlowSensitive/SourceLocationsLatticeTest.cpp clang/unittests/Analysis/FlowSensitive/TestingSupport.cpp clang/unittests/Analysis/FlowSensitive/TestingSupport.h clang/unittests/Analysis/FlowSensitive/TestingSupportTest.cpp @@ -506,6 +661,7 @@ clang/unittests/CrossTU/CrossTranslationUnitTest.cpp clang/unittests/Driver/SanitizerArgsTest.cpp clang/unittests/Format/CleanupTest.cpp +clang/unittests/Format/DefinitionBlockSeparatorTest.cpp clang/unittests/Format/FormatTest.cpp clang/unittests/Format/FormatTestComments.cpp clang/unittests/Format/FormatTestCSharp.cpp @@ -541,6 +697,7 @@ clang/unittests/Lex/HeaderMapTest.cpp clang/unittests/Lex/HeaderMapTestUtils.h clang/unittests/Lex/HeaderSearchTest.cpp +clang/unittests/Lex/PPMemoryAllocationsTest.cpp clang/unittests/libclang/CrashTests/LibclangCrashTest.cpp clang/unittests/Rewrite/RewriterTest.cpp clang/unittests/Sema/CodeCompleteTest.cpp @@ -554,6 +711,7 @@ clang/unittests/StaticAnalyzer/StoreTest.cpp clang/unittests/StaticAnalyzer/SValTest.cpp clang/unittests/StaticAnalyzer/SymbolReaperTest.cpp +clang/unittests/Tooling/CastExprTest.cpp clang/unittests/Tooling/DependencyScannerTest.cpp clang/unittests/Tooling/ExecutionTest.cpp clang/unittests/Tooling/LookupTest.cpp @@ -561,6 +719,7 @@ clang/unittests/Tooling/RefactoringActionRulesTest.cpp clang/unittests/Tooling/ReplacementTest.h clang/unittests/Tooling/SourceCodeBuildersTest.cpp +clang/unittests/Tooling/StandardLibraryTest.cpp clang/unittests/Tooling/StencilTest.cpp clang/unittests/Tooling/RecursiveASTVisitorTests/CallbacksCallExpr.cpp clang/unittests/Tooling/RecursiveASTVisitorTests/CallbacksLeaf.cpp @@ -577,6 +736,10 @@ clang/unittests/Tooling/Syntax/TokensTest.cpp clang/unittests/Tooling/Syntax/TreeTestBase.cpp clang/unittests/Tooling/Syntax/TreeTestBase.h +clang/unittests/Tooling/Syntax/Pseudo/GrammarTest.cpp +clang/unittests/Tooling/Syntax/Pseudo/LRTableTest.cpp +clang/unittests/Tooling/Syntax/Pseudo/PreprocessTest.cpp +clang/unittests/Tooling/Syntax/Pseudo/TokenTest.cpp clang/utils/TableGen/ClangDataCollectorsEmitter.cpp clang/utils/TableGen/ClangSyntaxEmitter.cpp clang/utils/TableGen/TableGenBackends.h @@ -641,6 +804,8 @@ clang-tools-extra/clang-tidy/ClangTidyProfiling.h clang-tools-extra/clang-tidy/GlobList.cpp clang-tools-extra/clang-tidy/GlobList.h +clang-tools-extra/clang-tidy/NoLintDirectiveHandler.cpp +clang-tools-extra/clang-tidy/NoLintDirectiveHandler.h clang-tools-extra/clang-tidy/abseil/AbseilMatcher.h clang-tools-extra/clang-tidy/abseil/CleanupCtadCheck.cpp clang-tools-extra/clang-tidy/abseil/CleanupCtadCheck.h @@ -709,6 +874,7 @@ clang-tools-extra/clang-tidy/boost/UseToStringCheck.h clang-tools-extra/clang-tidy/bugprone/ArgumentCommentCheck.cpp clang-tools-extra/clang-tidy/bugprone/ArgumentCommentCheck.h +clang-tools-extra/clang-tidy/bugprone/AssertSideEffectCheck.cpp clang-tools-extra/clang-tidy/bugprone/AssertSideEffectCheck.h clang-tools-extra/clang-tidy/bugprone/BadSignalToKillThreadCheck.cpp clang-tools-extra/clang-tidy/bugprone/BadSignalToKillThreadCheck.h @@ -760,6 +926,8 @@ clang-tools-extra/clang-tidy/bugprone/RedundantBranchConditionCheck.h clang-tools-extra/clang-tidy/bugprone/ReservedIdentifierCheck.cpp clang-tools-extra/clang-tidy/bugprone/ReservedIdentifierCheck.h +clang-tools-extra/clang-tidy/bugprone/SharedPtrArrayMismatchCheck.cpp +clang-tools-extra/clang-tidy/bugprone/SharedPtrArrayMismatchCheck.h clang-tools-extra/clang-tidy/bugprone/SignalHandlerCheck.cpp clang-tools-extra/clang-tidy/bugprone/SignalHandlerCheck.h clang-tools-extra/clang-tidy/bugprone/SignedCharMisuseCheck.cpp @@ -767,6 +935,8 @@ clang-tools-extra/clang-tidy/bugprone/SizeofContainerCheck.cpp clang-tools-extra/clang-tidy/bugprone/SizeofContainerCheck.h clang-tools-extra/clang-tidy/bugprone/SizeofExpressionCheck.h +clang-tools-extra/clang-tidy/bugprone/SmartPtrArrayMismatchCheck.cpp +clang-tools-extra/clang-tidy/bugprone/SmartPtrArrayMismatchCheck.h clang-tools-extra/clang-tidy/bugprone/SpuriouslyWakeUpFunctionsCheck.cpp clang-tools-extra/clang-tidy/bugprone/StringConstructorCheck.cpp clang-tools-extra/clang-tidy/bugprone/StringConstructorCheck.h @@ -849,6 +1019,7 @@ clang-tools-extra/clang-tidy/cppcoreguidelines/InitVariablesCheck.h clang-tools-extra/clang-tidy/cppcoreguidelines/InterfacesGlobalInitCheck.cpp clang-tools-extra/clang-tidy/cppcoreguidelines/InterfacesGlobalInitCheck.h +clang-tools-extra/clang-tidy/cppcoreguidelines/MacroUsageCheck.cpp clang-tools-extra/clang-tidy/cppcoreguidelines/MacroUsageCheck.h clang-tools-extra/clang-tidy/cppcoreguidelines/NarrowingConversionsCheck.cpp clang-tools-extra/clang-tidy/cppcoreguidelines/NarrowingConversionsCheck.h @@ -944,6 +1115,8 @@ clang-tools-extra/clang-tidy/misc/DefinitionsInHeadersCheck.cpp clang-tools-extra/clang-tidy/misc/DefinitionsInHeadersCheck.h clang-tools-extra/clang-tidy/misc/MiscTidyModule.cpp +clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.cpp +clang-tools-extra/clang-tidy/misc/MisleadingBidirectional.h clang-tools-extra/clang-tidy/misc/MisleadingIdentifier.cpp clang-tools-extra/clang-tidy/misc/MisleadingIdentifier.h clang-tools-extra/clang-tidy/misc/MisplacedConstCheck.cpp @@ -1013,7 +1186,6 @@ clang-tools-extra/clang-tidy/modernize/UseNullptrCheck.h clang-tools-extra/clang-tidy/modernize/UseOverrideCheck.cpp clang-tools-extra/clang-tidy/modernize/UseOverrideCheck.h -clang-tools-extra/clang-tidy/modernize/UseTrailingReturnTypeCheck.cpp clang-tools-extra/clang-tidy/modernize/UseTrailingReturnTypeCheck.h clang-tools-extra/clang-tidy/modernize/UseTransparentFunctorsCheck.cpp clang-tools-extra/clang-tidy/modernize/UseUsingCheck.cpp @@ -1072,6 +1244,8 @@ clang-tools-extra/clang-tidy/readability/BracesAroundStatementsCheck.cpp clang-tools-extra/clang-tidy/readability/BracesAroundStatementsCheck.h clang-tools-extra/clang-tidy/readability/ConstReturnTypeCheck.cpp +clang-tools-extra/clang-tidy/readability/ContainerContainsCheck.cpp +clang-tools-extra/clang-tidy/readability/ContainerContainsCheck.h clang-tools-extra/clang-tidy/readability/ContainerDataPointerCheck.cpp clang-tools-extra/clang-tidy/readability/ContainerDataPointerCheck.h clang-tools-extra/clang-tidy/readability/ContainerSizeEmptyCheck.h @@ -1079,6 +1253,8 @@ clang-tools-extra/clang-tidy/readability/ConvertMemberFunctionsToStatic.h clang-tools-extra/clang-tidy/readability/DeleteNullPointerCheck.cpp clang-tools-extra/clang-tidy/readability/DeleteNullPointerCheck.h +clang-tools-extra/clang-tidy/readability/DuplicateIncludeCheck.cpp +clang-tools-extra/clang-tidy/readability/DuplicateIncludeCheck.h clang-tools-extra/clang-tidy/readability/ElseAfterReturnCheck.h clang-tools-extra/clang-tidy/readability/FunctionCognitiveComplexityCheck.cpp clang-tools-extra/clang-tidy/readability/FunctionCognitiveComplexityCheck.h @@ -1122,7 +1298,9 @@ clang-tools-extra/clang-tidy/readability/RedundantStringCStrCheck.h clang-tools-extra/clang-tidy/readability/RedundantStringInitCheck.cpp clang-tools-extra/clang-tidy/readability/RedundantStringInitCheck.h +clang-tools-extra/clang-tidy/readability/SimplifyBooleanExprCheck.cpp clang-tools-extra/clang-tidy/readability/SimplifyBooleanExprCheck.h +clang-tools-extra/clang-tidy/readability/SimplifyBooleanExprMatchers.h clang-tools-extra/clang-tidy/readability/SimplifySubscriptExprCheck.cpp clang-tools-extra/clang-tidy/readability/StaticAccessedThroughInstanceCheck.cpp clang-tools-extra/clang-tidy/readability/StaticAccessedThroughInstanceCheck.h @@ -1230,7 +1408,6 @@ clang-tools-extra/clangd/IncludeCleaner.cpp clang-tools-extra/clangd/IncludeCleaner.h clang-tools-extra/clangd/IncludeFixer.cpp -clang-tools-extra/clangd/InlayHints.cpp clang-tools-extra/clangd/InlayHints.h clang-tools-extra/clangd/LSPBinder.h clang-tools-extra/clangd/ParsedAST.cpp @@ -1243,7 +1420,6 @@ clang-tools-extra/clangd/Quality.cpp clang-tools-extra/clangd/RIFF.cpp clang-tools-extra/clangd/RIFF.h -clang-tools-extra/clangd/Selection.cpp clang-tools-extra/clangd/Selection.h clang-tools-extra/clangd/SemanticHighlighting.h clang-tools-extra/clangd/SemanticSelection.cpp @@ -1290,14 +1466,12 @@ clang-tools-extra/clangd/index/Symbol.cpp clang-tools-extra/clangd/index/Symbol.h clang-tools-extra/clangd/index/SymbolCollector.cpp -clang-tools-extra/clangd/index/SymbolCollector.h clang-tools-extra/clangd/index/SymbolID.cpp clang-tools-extra/clangd/index/SymbolLocation.cpp clang-tools-extra/clangd/index/SymbolLocation.h clang-tools-extra/clangd/index/SymbolOrigin.cpp clang-tools-extra/clangd/index/SymbolOrigin.h clang-tools-extra/clangd/index/YAMLSerialization.cpp -clang-tools-extra/clangd/index/dex/Dex.h clang-tools-extra/clangd/index/dex/Iterator.cpp clang-tools-extra/clangd/index/dex/Iterator.h clang-tools-extra/clangd/index/dex/PostingList.cpp @@ -1314,6 +1488,8 @@ clang-tools-extra/clangd/index/remote/server/Server.cpp clang-tools-extra/clangd/index/remote/unimplemented/UnimplementedClient.cpp clang-tools-extra/clangd/indexer/IndexerMain.cpp +clang-tools-extra/clangd/refactor/InsertionPoint.cpp +clang-tools-extra/clangd/refactor/InsertionPoint.h clang-tools-extra/clangd/refactor/Rename.h clang-tools-extra/clangd/refactor/Tweak.cpp clang-tools-extra/clangd/refactor/Tweak.h @@ -1362,6 +1538,7 @@ clang-tools-extra/clangd/unittests/CanonicalIncludesTests.cpp clang-tools-extra/clangd/unittests/ClangdLSPServerTests.cpp clang-tools-extra/clangd/unittests/ClangdTests.cpp +clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp clang-tools-extra/clangd/unittests/CodeCompletionStringsTests.cpp clang-tools-extra/clangd/unittests/CollectMacrosTests.cpp clang-tools-extra/clangd/unittests/CompilerTests.cpp @@ -1378,6 +1555,7 @@ clang-tools-extra/clangd/unittests/FeatureModulesTests.cpp clang-tools-extra/clangd/unittests/FileDistanceTests.cpp clang-tools-extra/clangd/unittests/FileIndexTests.cpp +clang-tools-extra/clangd/unittests/FindSymbolsTests.cpp clang-tools-extra/clangd/unittests/FindTargetTests.cpp clang-tools-extra/clangd/unittests/FSTests.cpp clang-tools-extra/clangd/unittests/FuzzyMatchTests.cpp @@ -1387,10 +1565,10 @@ clang-tools-extra/clangd/unittests/IncludeCleanerTests.cpp clang-tools-extra/clangd/unittests/IndexActionTests.cpp clang-tools-extra/clangd/unittests/InlayHintTests.cpp +clang-tools-extra/clangd/unittests/InsertionPointTests.cpp clang-tools-extra/clangd/unittests/LoggerTests.cpp clang-tools-extra/clangd/unittests/LSPBinderTests.cpp clang-tools-extra/clangd/unittests/LSPClient.cpp -clang-tools-extra/clangd/unittests/LSPClient.h clang-tools-extra/clangd/unittests/ModulesTests.cpp clang-tools-extra/clangd/unittests/ParsedASTTests.cpp clang-tools-extra/clangd/unittests/PreambleTests.cpp @@ -1417,7 +1595,6 @@ clang-tools-extra/clangd/unittests/TidyProviderTests.cpp clang-tools-extra/clangd/unittests/TypeHierarchyTests.cpp clang-tools-extra/clangd/unittests/URITests.cpp -clang-tools-extra/clangd/unittests/XRefsTests.cpp clang-tools-extra/clangd/unittests/decision_forest_model/CategoricalFeature.h clang-tools-extra/clangd/unittests/remote/MarshallingTests.cpp clang-tools-extra/clangd/unittests/support/CancellationTests.cpp @@ -1433,7 +1610,6 @@ clang-tools-extra/clangd/unittests/support/TraceTests.cpp clang-tools-extra/clangd/unittests/tweaks/AddUsingTests.cpp clang-tools-extra/clangd/unittests/tweaks/AnnotateHighlightingsTests.cpp -clang-tools-extra/clangd/unittests/tweaks/DefineInlineTests.cpp clang-tools-extra/clangd/unittests/tweaks/DefineOutlineTests.cpp clang-tools-extra/clangd/unittests/tweaks/DumpASTTests.cpp clang-tools-extra/clangd/unittests/tweaks/DumpRecordLayoutTests.cpp @@ -1477,6 +1653,7 @@ clang-tools-extra/unittests/clang-tidy/GlobListTest.cpp clang-tools-extra/unittests/clang-tidy/OptionsProviderTest.cpp clang-tools-extra/unittests/clang-tidy/OverlappingReplacementsTest.cpp +clang-tools-extra/unittests/clang-tidy/ReadabilityModuleTest.cpp clang-tools-extra/unittests/clang-tidy/TransformerClangTidyCheckTest.cpp compiler-rt/include/sanitizer/linux_syscall_hooks.h compiler-rt/include/sanitizer/memprof_interface.h @@ -1487,6 +1664,7 @@ compiler-rt/lib/asan/asan_lock.h compiler-rt/lib/asan/asan_mapping.h compiler-rt/lib/asan/asan_mapping_sparc64.h +compiler-rt/lib/asan/asan_rtl_static.cpp compiler-rt/lib/asan/tests/asan_globals_test.cpp compiler-rt/lib/builtins/fp_extend.h compiler-rt/lib/builtins/fp_lib.h @@ -1504,7 +1682,6 @@ compiler-rt/lib/dfsan/dfsan_chained_origin_depot.h compiler-rt/lib/dfsan/dfsan_flags.h compiler-rt/lib/dfsan/dfsan_interceptors.cpp -compiler-rt/lib/dfsan/dfsan_new_delete.cpp compiler-rt/lib/dfsan/dfsan_origin.h compiler-rt/lib/dfsan/dfsan_platform.h compiler-rt/lib/dfsan/dfsan_thread.h @@ -1576,6 +1753,7 @@ compiler-rt/lib/hwasan/hwasan_linux.cpp compiler-rt/lib/hwasan/hwasan_poisoning.cpp compiler-rt/lib/hwasan/hwasan_poisoning.h +compiler-rt/lib/hwasan/hwasan_preinit.cpp compiler-rt/lib/interception/interception_mac.cpp compiler-rt/lib/interception/tests/interception_test_main.cpp compiler-rt/lib/lsan/lsan.h @@ -1597,10 +1775,8 @@ compiler-rt/lib/memprof/memprof_internal.h compiler-rt/lib/memprof/memprof_linux.cpp compiler-rt/lib/memprof/memprof_malloc_linux.cpp -compiler-rt/lib/memprof/memprof_meminfoblock.h compiler-rt/lib/memprof/memprof_mibmap.cpp compiler-rt/lib/memprof/memprof_mibmap.h -compiler-rt/lib/memprof/memprof_new_delete.cpp compiler-rt/lib/memprof/memprof_posix.cpp compiler-rt/lib/memprof/memprof_preinit.cpp compiler-rt/lib/memprof/memprof_rawprofile.cpp @@ -1620,6 +1796,7 @@ compiler-rt/lib/msan/msan_poisoning.h compiler-rt/lib/msan/msan_report.h compiler-rt/lib/orc/adt.h +compiler-rt/lib/orc/debug.h compiler-rt/lib/orc/elfnix_platform.cpp compiler-rt/lib/orc/elfnix_platform.h compiler-rt/lib/orc/endianness.h @@ -1628,6 +1805,7 @@ compiler-rt/lib/orc/extensible_rtti.cpp compiler-rt/lib/orc/extensible_rtti.h compiler-rt/lib/orc/log_error_to_stderr.cpp +compiler-rt/lib/orc/macho_ehframe_registration.cpp compiler-rt/lib/orc/macho_platform.cpp compiler-rt/lib/orc/macho_platform.h compiler-rt/lib/orc/run_program_wrapper.cpp @@ -1653,6 +1831,7 @@ compiler-rt/lib/sanitizer_common/sanitizer_errno.h compiler-rt/lib/sanitizer_common/sanitizer_errno_codes.h compiler-rt/lib/sanitizer_common/sanitizer_flat_map.h +compiler-rt/lib/sanitizer_common/sanitizer_fuchsia.cpp compiler-rt/lib/sanitizer_common/sanitizer_leb128.h compiler-rt/lib/sanitizer_common/sanitizer_local_address_space_view.h compiler-rt/lib/sanitizer_common/sanitizer_lzw.h @@ -1731,7 +1910,6 @@ compiler-rt/lib/scudo/standalone/vector.h compiler-rt/lib/scudo/standalone/wrappers_c.cpp compiler-rt/lib/scudo/standalone/wrappers_c.h -compiler-rt/lib/scudo/standalone/wrappers_cpp.cpp compiler-rt/lib/scudo/standalone/wrappers_c_bionic.cpp compiler-rt/lib/scudo/standalone/wrappers_c_checks.h compiler-rt/lib/scudo/standalone/benchmarks/malloc_benchmark.cpp @@ -1758,7 +1936,6 @@ compiler-rt/lib/scudo/standalone/tests/size_class_map_test.cpp compiler-rt/lib/scudo/standalone/tests/stats_test.cpp compiler-rt/lib/scudo/standalone/tests/strings_test.cpp -compiler-rt/lib/scudo/standalone/tests/tsd_test.cpp compiler-rt/lib/scudo/standalone/tests/vector_test.cpp compiler-rt/lib/scudo/standalone/tests/wrappers_cpp_test.cpp compiler-rt/lib/scudo/standalone/tests/wrappers_c_test.cpp @@ -1837,6 +2014,7 @@ compiler-rt/lib/xray/tests/unit/xray_unit_test_main.cpp compiler-rt/tools/gwp_asan/options_parser_fuzzer.cpp compiler-rt/tools/gwp_asan/stack_trace_compressor_fuzzer.cpp +cross-project-tests/debuginfo-tests/clang_llvm_roundtrip/simplified_template_names_noncanonical_type_units.cpp cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_file/dex_and_source/test.cpp cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_file/precompiled_binary/test.cpp cross-project-tests/debuginfo-tests/dexter/feature_tests/commands/perfect/dex_declare_file/precompiled_binary_different_dir/source/test.cpp @@ -1847,15 +2025,16 @@ cross-project-tests/debuginfo-tests/dexter-tests/realigned-frame.cpp cross-project-tests/debuginfo-tests/llvm-prettyprinters/gdb/llvm-support.cpp flang/examples/external-hello.cpp -flang/examples/flang-omp-report-plugin/flang-omp-report-visitor.cpp -flang/examples/flang-omp-report-plugin/flang-omp-report-visitor.h -flang/examples/flang-omp-report-plugin/flang-omp-report.cpp +flang/examples/FlangOmpReport/FlangOmpReport.cpp +flang/examples/FlangOmpReport/FlangOmpReportVisitor.cpp +flang/examples/FlangOmpReport/FlangOmpReportVisitor.h flang/examples/PrintFlangFunctionNames/PrintFlangFunctionNames.cpp flang/include/flang/ISO_Fortran_binding.h flang/include/flang/Common/bit-population-count.h flang/include/flang/Common/constexpr-bitset.h flang/include/flang/Common/default-kinds.h flang/include/flang/Common/enum-set.h +flang/include/flang/Common/fast-int-set.h flang/include/flang/Common/format.h flang/include/flang/Common/Fortran-features.h flang/include/flang/Common/Fortran.h @@ -1903,31 +2082,37 @@ flang/include/flang/Frontend/FrontendOptions.h flang/include/flang/Frontend/FrontendPluginRegistry.h flang/include/flang/Frontend/PreprocessorOptions.h +flang/include/flang/Frontend/TargetOptions.h flang/include/flang/Frontend/TextDiagnostic.h flang/include/flang/Frontend/TextDiagnosticBuffer.h flang/include/flang/Frontend/TextDiagnosticPrinter.h flang/include/flang/FrontendTool/Utils.h flang/include/flang/Lower/AbstractConverter.h +flang/include/flang/Lower/Allocatable.h +flang/include/flang/Lower/BoxAnalyzer.h flang/include/flang/Lower/Bridge.h -flang/include/flang/Lower/CharacterExpr.h -flang/include/flang/Lower/CharacterRuntime.h +flang/include/flang/Lower/CallInterface.h flang/include/flang/Lower/Coarray.h -flang/include/flang/Lower/ComplexExpr.h +flang/include/flang/Lower/ComponentPath.h +flang/include/flang/Lower/ConvertExpr.h flang/include/flang/Lower/ConvertType.h -flang/include/flang/Lower/DoLoopHelper.h -flang/include/flang/Lower/FIRBuilder.h +flang/include/flang/Lower/ConvertVariable.h +flang/include/flang/Lower/DumpEvaluateExpr.h +flang/include/flang/Lower/HostAssociations.h +flang/include/flang/Lower/IntervalSet.h flang/include/flang/Lower/IntrinsicCall.h flang/include/flang/Lower/IO.h +flang/include/flang/Lower/IterationSpace.h flang/include/flang/Lower/Mangler.h flang/include/flang/Lower/OpenACC.h flang/include/flang/Lower/OpenMP.h flang/include/flang/Lower/PFTBuilder.h flang/include/flang/Lower/PFTDefs.h flang/include/flang/Lower/Runtime.h +flang/include/flang/Lower/StatementContext.h flang/include/flang/Lower/Todo.h -flang/include/flang/Lower/Utils.h -flang/include/flang/Lower/Support/BoxValue.h flang/include/flang/Lower/Support/Utils.h +flang/include/flang/Lower/Support/Verifier.h flang/include/flang/Optimizer/Builder/BoxValue.h flang/include/flang/Optimizer/Builder/Character.h flang/include/flang/Optimizer/Builder/Complex.h @@ -1937,11 +2122,13 @@ flang/include/flang/Optimizer/Builder/MutableBox.h flang/include/flang/Optimizer/Builder/Runtime/Assign.h flang/include/flang/Optimizer/Builder/Runtime/Character.h +flang/include/flang/Optimizer/Builder/Runtime/Command.h flang/include/flang/Optimizer/Builder/Runtime/Derived.h flang/include/flang/Optimizer/Builder/Runtime/Numeric.h flang/include/flang/Optimizer/Builder/Runtime/Ragged.h flang/include/flang/Optimizer/Builder/Runtime/Reduction.h flang/include/flang/Optimizer/Builder/Runtime/RTBuilder.h +flang/include/flang/Optimizer/Builder/Runtime/Stop.h flang/include/flang/Optimizer/Builder/Runtime/Transformational.h flang/include/flang/Optimizer/CodeGen/CodeGen.h flang/include/flang/Optimizer/Dialect/FIRAttr.h @@ -1984,6 +2171,7 @@ flang/include/flang/Runtime/descriptor.h flang/include/flang/Runtime/entry-names.h flang/include/flang/Runtime/extensions.h +flang/include/flang/Runtime/inquiry.h flang/include/flang/Runtime/io-api.h flang/include/flang/Runtime/iostat.h flang/include/flang/Runtime/main.h @@ -2053,22 +2241,26 @@ flang/lib/Frontend/TextDiagnosticBuffer.cpp flang/lib/Frontend/TextDiagnosticPrinter.cpp flang/lib/FrontendTool/ExecuteCompilerInvocation.cpp -flang/lib/Lower/CharacterExpr.cpp -flang/lib/Lower/CharacterRuntime.cpp +flang/lib/Lower/Allocatable.cpp +flang/lib/Lower/Bridge.cpp +flang/lib/Lower/CallInterface.cpp flang/lib/Lower/Coarray.cpp -flang/lib/Lower/ComplexExpr.cpp +flang/lib/Lower/ComponentPath.cpp flang/lib/Lower/ConvertExpr.cpp flang/lib/Lower/ConvertType.cpp -flang/lib/Lower/DoLoopHelper.cpp -flang/lib/Lower/FIRBuilder.cpp +flang/lib/Lower/ConvertVariable.cpp +flang/lib/Lower/DumpEvaluateExpr.cpp flang/lib/Lower/IntervalSet.h flang/lib/Lower/IntrinsicCall.cpp flang/lib/Lower/IO.cpp +flang/lib/Lower/IterationSpace.cpp flang/lib/Lower/Mangler.cpp flang/lib/Lower/OpenACC.cpp flang/lib/Lower/OpenMP.cpp flang/lib/Lower/PFTBuilder.cpp flang/lib/Lower/RTBuilder.h +flang/lib/Lower/Runtime.cpp +flang/lib/Lower/SymbolMap.cpp flang/lib/Optimizer/Builder/BoxValue.cpp flang/lib/Optimizer/Builder/Character.cpp flang/lib/Optimizer/Builder/Complex.cpp @@ -2077,13 +2269,16 @@ flang/lib/Optimizer/Builder/MutableBox.cpp flang/lib/Optimizer/Builder/Runtime/Assign.cpp flang/lib/Optimizer/Builder/Runtime/Character.cpp +flang/lib/Optimizer/Builder/Runtime/Command.cpp flang/lib/Optimizer/Builder/Runtime/Derived.cpp flang/lib/Optimizer/Builder/Runtime/Numeric.cpp flang/lib/Optimizer/Builder/Runtime/Ragged.cpp flang/lib/Optimizer/Builder/Runtime/Reduction.cpp +flang/lib/Optimizer/Builder/Runtime/Stop.cpp flang/lib/Optimizer/Builder/Runtime/Transformational.cpp flang/lib/Optimizer/CodeGen/CGOps.cpp flang/lib/Optimizer/CodeGen/CGOps.h +flang/lib/Optimizer/CodeGen/CodeGen.cpp flang/lib/Optimizer/CodeGen/DescriptorModel.h flang/lib/Optimizer/CodeGen/PassDetail.h flang/lib/Optimizer/CodeGen/PreCGRewrite.cpp @@ -2091,10 +2286,13 @@ flang/lib/Optimizer/CodeGen/Target.h flang/lib/Optimizer/CodeGen/TargetRewrite.cpp flang/lib/Optimizer/CodeGen/TypeConverter.h +flang/lib/Optimizer/Dialect/FIRAttr.cpp flang/lib/Optimizer/Dialect/FIRDialect.cpp flang/lib/Optimizer/Dialect/FIROps.cpp flang/lib/Optimizer/Dialect/FIRType.cpp +flang/lib/Optimizer/Dialect/Inliner.cpp flang/lib/Optimizer/Support/FIRContext.cpp +flang/lib/Optimizer/Support/InitFIR.cpp flang/lib/Optimizer/Support/InternalNames.cpp flang/lib/Optimizer/Support/KindMapping.cpp flang/lib/Optimizer/Transforms/AbstractResult.cpp @@ -2103,7 +2301,6 @@ flang/lib/Optimizer/Transforms/ArrayValueCopy.cpp flang/lib/Optimizer/Transforms/CharacterConversion.cpp flang/lib/Optimizer/Transforms/ExternalNameConversion.cpp -flang/lib/Optimizer/Transforms/Inliner.cpp flang/lib/Optimizer/Transforms/MemoryAllocation.cpp flang/lib/Optimizer/Transforms/MemRefDataFlowOpt.cpp flang/lib/Optimizer/Transforms/PassDetail.h @@ -2157,7 +2354,6 @@ flang/lib/Semantics/check-allocate.h flang/lib/Semantics/check-arithmeticif.cpp flang/lib/Semantics/check-arithmeticif.h -flang/lib/Semantics/check-call.cpp flang/lib/Semantics/check-call.h flang/lib/Semantics/check-case.cpp flang/lib/Semantics/check-case.h @@ -2167,7 +2363,6 @@ flang/lib/Semantics/check-data.h flang/lib/Semantics/check-deallocate.cpp flang/lib/Semantics/check-deallocate.h -flang/lib/Semantics/check-declarations.cpp flang/lib/Semantics/check-declarations.h flang/lib/Semantics/check-directive-structure.h flang/lib/Semantics/check-do-forall.cpp @@ -2195,7 +2390,6 @@ flang/lib/Semantics/compute-offsets.cpp flang/lib/Semantics/compute-offsets.h flang/lib/Semantics/data-to-inits.cpp -flang/lib/Semantics/data-to-inits.h flang/lib/Semantics/mod-file.h flang/lib/Semantics/pointer-assignment.cpp flang/lib/Semantics/pointer-assignment.h @@ -2213,7 +2407,6 @@ flang/lib/Semantics/runtime-type-info.cpp flang/lib/Semantics/scope.cpp flang/lib/Semantics/semantics.cpp -flang/lib/Semantics/symbol.cpp flang/lib/Semantics/tools.cpp flang/lib/Semantics/unparse-with-symbols.cpp flang/module/omp_lib.h @@ -2248,6 +2441,7 @@ flang/runtime/format-implementation.h flang/runtime/format.cpp flang/runtime/format.h +flang/runtime/inquiry.cpp flang/runtime/internal-unit.cpp flang/runtime/internal-unit.h flang/runtime/io-api.cpp @@ -2287,8 +2481,8 @@ flang/runtime/type-info.h flang/runtime/unit-map.cpp flang/runtime/unit-map.h -flang/runtime/unit.cpp flang/runtime/unit.h +flang/tools/bbc/bbc.cpp flang/tools/f18/dump.cpp flang/tools/f18-parse-demo/f18-parse-demo.cpp flang/tools/f18-parse-demo/stub-evaluate.cpp @@ -2296,6 +2490,7 @@ flang/tools/flang-driver/driver.cpp flang/tools/flang-driver/fc1_main.cpp flang/tools/tco/tco.cpp +flang/unittests/Common/FastIntSetTest.cpp flang/unittests/Decimal/quick-sanity-test.cpp flang/unittests/Decimal/thorough-test.cpp flang/unittests/Evaluate/bit-population-count.cpp @@ -2324,11 +2519,13 @@ flang/unittests/Optimizer/Builder/FIRBuilderTest.cpp flang/unittests/Optimizer/Builder/Runtime/AssignTest.cpp flang/unittests/Optimizer/Builder/Runtime/CharacterTest.cpp +flang/unittests/Optimizer/Builder/Runtime/CommandTest.cpp flang/unittests/Optimizer/Builder/Runtime/DerivedTest.cpp flang/unittests/Optimizer/Builder/Runtime/NumericTest.cpp flang/unittests/Optimizer/Builder/Runtime/RaggedTest.cpp flang/unittests/Optimizer/Builder/Runtime/ReductionTest.cpp flang/unittests/Optimizer/Builder/Runtime/RuntimeCallTestBase.h +flang/unittests/Optimizer/Builder/Runtime/StopTest.cpp flang/unittests/Optimizer/Builder/Runtime/TransformationalTest.cpp flang/unittests/Runtime/BufferTest.cpp flang/unittests/Runtime/CharacterTest.cpp @@ -2337,6 +2534,7 @@ flang/unittests/Runtime/CrashHandlerFixture.h flang/unittests/Runtime/ExternalIOTest.cpp flang/unittests/Runtime/Format.cpp +flang/unittests/Runtime/Inquiry.cpp flang/unittests/Runtime/ListInputTest.cpp flang/unittests/Runtime/Matmul.cpp flang/unittests/Runtime/MiscIntrinsic.cpp @@ -2390,8 +2588,38 @@ libc/fuzzing/string/strcmp_fuzz.cpp libc/fuzzing/string/strstr_fuzz.cpp libc/include/__llvm-libc-common.h -libc/include/__llvm-libc-stdc-types.h -libc/include/__posix-types.h +libc/include/llvm-libc-macros/fcntl-macros.h +libc/include/llvm-libc-macros/stdio-macros.h +libc/include/llvm-libc-macros/linux/fcntl-macros.h +libc/include/llvm-libc-types/cnd_t.h +libc/include/llvm-libc-types/div_t.h +libc/include/llvm-libc-types/double_t.h +libc/include/llvm-libc-types/fenv_t.h +libc/include/llvm-libc-types/fexcept_t.h +libc/include/llvm-libc-types/FILE.h +libc/include/llvm-libc-types/float_t.h +libc/include/llvm-libc-types/imaxdiv_t.h +libc/include/llvm-libc-types/ldiv_t.h +libc/include/llvm-libc-types/lldiv_t.h +libc/include/llvm-libc-types/mode_t.h +libc/include/llvm-libc-types/mtx_t.h +libc/include/llvm-libc-types/off_t.h +libc/include/llvm-libc-types/once_flag.h +libc/include/llvm-libc-types/size_t.h +libc/include/llvm-libc-types/ssize_t.h +libc/include/llvm-libc-types/struct_sigaction.h +libc/include/llvm-libc-types/struct_tm.h +libc/include/llvm-libc-types/thrd_start_t.h +libc/include/llvm-libc-types/thrd_t.h +libc/include/llvm-libc-types/time_t.h +libc/include/llvm-libc-types/__atexithandler_t.h +libc/include/llvm-libc-types/__bsearchcompare_t.h +libc/include/llvm-libc-types/__call_once_func_t.h +libc/include/llvm-libc-types/__futex_word.h +libc/include/llvm-libc-types/__mutex_type.h +libc/include/llvm-libc-types/__qsortcompare_t.h +libc/include/llvm-libc-types/__sighandler_t.h +libc/loader/linux/aarch64/start.cpp libc/loader/linux/x86_64/start.cpp libc/src/assert/__assert_fail.h libc/src/ctype/isalnum.cpp @@ -2428,9 +2656,14 @@ libc/src/ctype/toupper.h libc/src/errno/dummy_errno.cpp libc/src/errno/dummy_errno.h +libc/src/errno/errno.cpp libc/src/errno/llvmlibc_errno.h -libc/src/errno/__errno_location.cpp -libc/src/errno/__errno_location.h +libc/src/fcntl/creat.h +libc/src/fcntl/open.h +libc/src/fcntl/openat.h +libc/src/fcntl/linux/creat.cpp +libc/src/fcntl/linux/open.cpp +libc/src/fcntl/linux/openat.cpp libc/src/fenv/feclearexcept.cpp libc/src/fenv/feclearexcept.h libc/src/fenv/fedisableexcept.cpp @@ -2512,6 +2745,9 @@ libc/src/math/llround.h libc/src/math/llroundf.h libc/src/math/llroundl.h +libc/src/math/log10f.h +libc/src/math/log1pf.h +libc/src/math/log2f.h libc/src/math/logb.h libc/src/math/logbf.h libc/src/math/logbl.h @@ -2566,6 +2802,8 @@ libc/src/math/generic/ceil.cpp libc/src/math/generic/ceilf.cpp libc/src/math/generic/ceill.cpp +libc/src/math/generic/common_constants.cpp +libc/src/math/generic/common_constants.h libc/src/math/generic/copysign.cpp libc/src/math/generic/copysignf.cpp libc/src/math/generic/copysignl.cpp @@ -2609,6 +2847,9 @@ libc/src/math/generic/llround.cpp libc/src/math/generic/llroundf.cpp libc/src/math/generic/llroundl.cpp +libc/src/math/generic/log10f.cpp +libc/src/math/generic/log1pf.cpp +libc/src/math/generic/log2f.cpp libc/src/math/generic/logb.cpp libc/src/math/generic/logbf.cpp libc/src/math/generic/logbl.cpp @@ -2654,9 +2895,6 @@ libc/src/math/generic/truncl.cpp libc/src/math/x86_64/cos.cpp libc/src/math/x86_64/sin.cpp -libc/src/math/x86_64/sqrt.cpp -libc/src/math/x86_64/sqrtf.cpp -libc/src/math/x86_64/sqrtl.cpp libc/src/math/x86_64/tan.cpp libc/src/signal/raise.h libc/src/signal/sigaction.h @@ -2679,10 +2917,11 @@ libc/src/stdio/FILE.h libc/src/stdio/fwrite.cpp libc/src/stdio/fwrite.h -libc/src/stdlib/abort.cpp libc/src/stdlib/abort.h libc/src/stdlib/abs.cpp libc/src/stdlib/abs.h +libc/src/stdlib/atexit.cpp +libc/src/stdlib/atexit.h libc/src/stdlib/atof.cpp libc/src/stdlib/atof.h libc/src/stdlib/atoi.cpp @@ -2695,6 +2934,10 @@ libc/src/stdlib/bsearch.h libc/src/stdlib/div.cpp libc/src/stdlib/div.h +libc/src/stdlib/exit.cpp +libc/src/stdlib/exit.h +libc/src/stdlib/getenv.cpp +libc/src/stdlib/getenv.h libc/src/stdlib/labs.cpp libc/src/stdlib/labs.h libc/src/stdlib/ldiv.cpp @@ -2720,6 +2963,7 @@ libc/src/stdlib/strtoull.cpp libc/src/stdlib/strtoull.h libc/src/stdlib/_Exit.h +libc/src/stdlib/linux/abort.cpp libc/src/stdlib/linux/_Exit.cpp libc/src/string/bcmp.cpp libc/src/string/bcmp.h @@ -2783,7 +3027,6 @@ libc/src/string/strtok_r.cpp libc/src/string/strtok_r.h libc/src/string/memory_utils/bcmp_implementations.h -libc/src/string/memory_utils/elements.h libc/src/string/memory_utils/elements_aarch64.h libc/src/string/memory_utils/elements_x86.h libc/src/string/memory_utils/memcmp_implementations.h @@ -2793,25 +3036,30 @@ libc/src/sys/mman/mmap.h libc/src/sys/mman/munmap.h libc/src/sys/mman/linux/mmap.cpp -libc/src/sys/mman/linux/munmap.cpp +libc/src/sys/stat/mkdir.h +libc/src/sys/stat/mkdirat.h +libc/src/sys/stat/linux/mkdir.cpp +libc/src/sys/stat/linux/mkdirat.cpp libc/src/threads/call_once.h libc/src/threads/cnd_broadcast.h libc/src/threads/cnd_destroy.h libc/src/threads/cnd_init.h libc/src/threads/cnd_signal.h libc/src/threads/cnd_wait.h +libc/src/threads/mtx_destroy.cpp libc/src/threads/mtx_destroy.h +libc/src/threads/mtx_init.cpp libc/src/threads/mtx_init.h +libc/src/threads/mtx_lock.cpp libc/src/threads/mtx_lock.h +libc/src/threads/mtx_unlock.cpp libc/src/threads/mtx_unlock.h libc/src/threads/thrd_create.h libc/src/threads/thrd_join.h libc/src/threads/linux/call_once.cpp libc/src/threads/linux/CndVar.h +libc/src/threads/linux/cnd_wait.cpp libc/src/threads/linux/Futex.h -libc/src/threads/linux/mtx_destroy.cpp -libc/src/threads/linux/mtx_init.cpp -libc/src/threads/linux/Mutex.h libc/src/threads/linux/thrd_create.cpp libc/src/threads/linux/thrd_join.cpp libc/src/threads/linux/Thread.h @@ -2827,7 +3075,19 @@ libc/src/time/mktime.h libc/src/time/time_utils.cpp libc/src/time/time_utils.h +libc/src/unistd/close.h +libc/src/unistd/fsync.h +libc/src/unistd/read.h +libc/src/unistd/rmdir.h +libc/src/unistd/unlink.h +libc/src/unistd/unlinkat.h libc/src/unistd/write.h +libc/src/unistd/linux/close.cpp +libc/src/unistd/linux/fsync.cpp +libc/src/unistd/linux/read.cpp +libc/src/unistd/linux/rmdir.cpp +libc/src/unistd/linux/unlink.cpp +libc/src/unistd/linux/unlinkat.cpp libc/src/unistd/linux/write.cpp libc/src/__support/architectures.h libc/src/__support/common.h @@ -2841,43 +3101,50 @@ libc/src/__support/str_to_integer.h libc/src/__support/CPP/Array.h libc/src/__support/CPP/ArrayRef.h +libc/src/__support/CPP/atomic.h +libc/src/__support/CPP/Bit.h libc/src/__support/CPP/Bitset.h libc/src/__support/CPP/Functional.h libc/src/__support/CPP/Limits.h libc/src/__support/CPP/StringView.h -libc/src/__support/CPP/TypeTraits.h +libc/src/__support/CPP/Utility.h +libc/src/__support/CPP/vector.h +libc/src/__support/File/file.cpp +libc/src/__support/File/file.h libc/src/__support/FPUtil/BasicOperations.h libc/src/__support/FPUtil/DivisionAndRemainderOperations.h libc/src/__support/FPUtil/FEnvImpl.h -libc/src/__support/FPUtil/FEnvUtils.h libc/src/__support/FPUtil/FloatProperties.h libc/src/__support/FPUtil/FMA.h libc/src/__support/FPUtil/FPBits.h -libc/src/__support/FPUtil/Hypot.h libc/src/__support/FPUtil/ManipulationFunctions.h libc/src/__support/FPUtil/NearestIntegerOperations.h libc/src/__support/FPUtil/NormalFloat.h libc/src/__support/FPUtil/PlatformDefs.h libc/src/__support/FPUtil/PolyEval.h -libc/src/__support/FPUtil/Sqrt.h +libc/src/__support/FPUtil/sqrt.h libc/src/__support/FPUtil/UInt.h libc/src/__support/FPUtil/XFloat.h libc/src/__support/FPUtil/aarch64/FEnvImpl.h libc/src/__support/FPUtil/aarch64/FMA.h +libc/src/__support/FPUtil/aarch64/sqrt.h libc/src/__support/FPUtil/generic/FMA.h +libc/src/__support/FPUtil/generic/sqrt.h +libc/src/__support/FPUtil/generic/sqrt_80_bit_long_double.h libc/src/__support/FPUtil/x86_64/FEnvImpl.h libc/src/__support/FPUtil/x86_64/FMA.h -libc/src/__support/FPUtil/x86_64/LongDoubleBits.h libc/src/__support/FPUtil/x86_64/NextAfterLongDouble.h libc/src/__support/FPUtil/x86_64/PolyEval.h -libc/src/__support/FPUtil/x86_64/SqrtLongDouble.h +libc/src/__support/FPUtil/x86_64/sqrt.h libc/src/__support/OSUtil/io.h libc/src/__support/OSUtil/quick_exit.h libc/src/__support/OSUtil/syscall.h libc/src/__support/OSUtil/linux/io.h -libc/src/__support/OSUtil/linux/quick_exit.h libc/src/__support/OSUtil/linux/syscall.h +libc/src/__support/OSUtil/linux/aarch64/syscall.h libc/src/__support/OSUtil/linux/x86_64/syscall.h +libc/src/__support/threads/mutex.h +libc/src/__support/threads/linux/mutex.h libc/utils/HdrGen/Command.cpp libc/utils/HdrGen/Command.h libc/utils/HdrGen/Generator.cpp @@ -2909,8 +3176,9 @@ libc/utils/UnitTest/FPMatcher.h libc/utils/UnitTest/FuchsiaTest.h libc/utils/UnitTest/LibcTest.cpp -libc/utils/UnitTest/LibcTest.h libc/utils/UnitTest/LibcTestMain.cpp +libc/utils/UnitTest/MemoryMatcher.cpp +libc/utils/UnitTest/MemoryMatcher.h libc/utils/UnitTest/PlatformDefs.h libc/utils/UnitTest/Test.h libclc/generic/include/config.h @@ -3119,6 +3387,7 @@ libclc/generic/lib/math/ep_log.h libcxx/benchmarks/format.bench.cpp libcxx/benchmarks/formatted_size.bench.cpp +libcxx/benchmarks/formatter_float.bench.cpp libcxx/benchmarks/format_to.bench.cpp libcxx/benchmarks/format_to_n.bench.cpp libcxx/benchmarks/to_chars.bench.cpp @@ -3126,21 +3395,49 @@ libcxx/benchmarks/variant_visit_1.bench.cpp libcxx/benchmarks/variant_visit_2.bench.cpp libcxx/benchmarks/variant_visit_3.bench.cpp +libcxx/include/__algorithm/adjacent_find.h +libcxx/include/__algorithm/all_of.h +libcxx/include/__algorithm/any_of.h +libcxx/include/__algorithm/count.h +libcxx/include/__algorithm/count_if.h +libcxx/include/__algorithm/find.h +libcxx/include/__algorithm/find_first_of.h +libcxx/include/__algorithm/find_if.h +libcxx/include/__algorithm/find_if_not.h +libcxx/include/__algorithm/for_each.h +libcxx/include/__algorithm/for_each_n.h +libcxx/include/__algorithm/iter_swap.h +libcxx/include/__algorithm/mismatch.h +libcxx/include/__algorithm/none_of.h +libcxx/include/__algorithm/swap_ranges.h +libcxx/include/__compare/is_eq.h libcxx/include/__filesystem/file_time_type.h libcxx/include/__filesystem/file_type.h libcxx/include/__filesystem/space_info.h +libcxx/include/__format/formatter_floating_point.h +libcxx/include/__format/formatter_pointer.h libcxx/include/__memory/voidify.h libcxx/include/__numeric/exclusive_scan.h libcxx/include/__numeric/inclusive_scan.h libcxx/include/__numeric/reduce.h libcxx/include/__numeric/transform_reduce.h +libcxx/include/__random/default_random_engine.h +libcxx/include/__random/knuth_b.h +libcxx/include/__ranges/dangling.h +libcxx/include/__ranges/enable_borrowed_range.h libcxx/include/__support/ibm/gettod_zos.h libcxx/include/__support/ibm/nanosleep.h libcxx/include/__support/openbsd/xlocale.h libcxx/include/__support/solaris/floatingpoint.h libcxx/include/__support/solaris/wchar.h +libcxx/include/__utility/auto_cast.h +libcxx/include/__utility/declval.h +libcxx/include/__utility/forward.h +libcxx/include/__utility/move.h +libcxx/include/__utility/swap.h libcxx/src/chrono_system_time_init.h libcxx/src/format.cpp +libcxx/src/ios.instantiations.cpp libcxx/src/iostream_init.h libcxx/src/legacy_pointer_safety.cpp libcxx/src/utility.cpp @@ -3175,6 +3472,7 @@ lld/COFF/TypeMerger.h lld/COFF/Writer.h lld/Common/Args.cpp +lld/Common/CommonLinkerContext.cpp lld/Common/DWARF.cpp lld/Common/Memory.cpp lld/Common/Reproduce.cpp @@ -3213,6 +3511,8 @@ lld/ELF/Arch/SPARCV9.cpp lld/include/lld/Common/Args.h lld/include/lld/Common/Arrays.h +lld/include/lld/Common/CommonLinkerContext.h +lld/include/lld/Common/Driver.h lld/include/lld/Common/DWARF.h lld/include/lld/Common/Filesystem.h lld/include/lld/Common/Strings.h @@ -3251,6 +3551,8 @@ lld/MachO/OutputSegment.h lld/MachO/Relocations.cpp lld/MachO/Relocations.h +lld/MachO/SectionPriorities.cpp +lld/MachO/SectionPriorities.h lld/MachO/Symbols.cpp lld/MachO/Symbols.h lld/MachO/SymbolTable.cpp @@ -3370,7 +3672,6 @@ lldb/include/lldb/Core/Debugger.h lldb/include/lldb/Core/Declaration.h lldb/include/lldb/Core/DumpRegisterValue.h -lldb/include/lldb/Core/dwarf.h lldb/include/lldb/Core/EmulateInstruction.h lldb/include/lldb/Core/Highlighter.h lldb/include/lldb/Core/IOHandlerCursesGUI.h @@ -3417,6 +3718,7 @@ lldb/include/lldb/Host/File.h lldb/include/lldb/Host/FileAction.h lldb/include/lldb/Host/FileSystem.h +lldb/include/lldb/Host/Host.h lldb/include/lldb/Host/HostGetOpt.h lldb/include/lldb/Host/HostInfo.h lldb/include/lldb/Host/HostNativeProcess.h @@ -3429,7 +3731,6 @@ lldb/include/lldb/Host/PseudoTerminal.h lldb/include/lldb/Host/SafeMachO.h lldb/include/lldb/Host/Socket.h -lldb/include/lldb/Host/SocketAddress.h lldb/include/lldb/Host/Terminal.h lldb/include/lldb/Host/Time.h lldb/include/lldb/Host/XML.h @@ -3511,6 +3812,7 @@ lldb/include/lldb/Target/JITLoader.h lldb/include/lldb/Target/JITLoaderList.h lldb/include/lldb/Target/MemoryTagManager.h +lldb/include/lldb/Target/MemoryTagMap.h lldb/include/lldb/Target/ModuleCache.h lldb/include/lldb/Target/OperatingSystem.h lldb/include/lldb/Target/PostMortemProcess.h @@ -3564,9 +3866,10 @@ lldb/include/lldb/Utility/FileSpec.h lldb/include/lldb/Utility/Flags.h lldb/include/lldb/Utility/GDBRemote.h +lldb/include/lldb/Utility/Instrumentation.h lldb/include/lldb/Utility/IOObject.h lldb/include/lldb/Utility/LLDBAssert.h -lldb/include/lldb/Utility/Logging.h +lldb/include/lldb/Utility/LLDBLog.h lldb/include/lldb/Utility/Predicate.h lldb/include/lldb/Utility/ProcessInfo.h lldb/include/lldb/Utility/RangeMap.h @@ -3593,14 +3896,45 @@ lldb/include/lldb/Utility/VASPrintf.h lldb/include/lldb/Utility/VMRange.h lldb/include/lldb/Version/Version.h +lldb/source/API/SBAddress.cpp +lldb/source/API/SBAttachInfo.cpp +lldb/source/API/SBBroadcaster.cpp lldb/source/API/SBCommandInterpreterRunOptions.cpp +lldb/source/API/SBCommunication.cpp +lldb/source/API/SBCompileUnit.cpp +lldb/source/API/SBDebugger.cpp +lldb/source/API/SBEnvironment.cpp +lldb/source/API/SBFile.cpp +lldb/source/API/SBFileSpec.cpp +lldb/source/API/SBFileSpecList.cpp +lldb/source/API/SBFunction.cpp +lldb/source/API/SBHostOS.cpp +lldb/source/API/SBLanguageRuntime.cpp +lldb/source/API/SBLaunchInfo.cpp +lldb/source/API/SBLineEntry.cpp +lldb/source/API/SBListener.cpp lldb/source/API/SBModule.cpp -lldb/source/API/SBReproducerPrivate.h +lldb/source/API/SBModuleSpec.cpp +lldb/source/API/SBProcessInfo.cpp +lldb/source/API/SBQueueItem.cpp +lldb/source/API/SBSection.cpp +lldb/source/API/SBStream.cpp +lldb/source/API/SBStringList.cpp +lldb/source/API/SBSymbol.cpp +lldb/source/API/SBSymbolContext.cpp +lldb/source/API/SBThreadPlan.cpp +lldb/source/API/SBTrace.cpp +lldb/source/API/SBTypeFilter.cpp +lldb/source/API/SBTypeFormat.cpp +lldb/source/API/SBUnixSignals.cpp +lldb/source/API/SBValueList.cpp +lldb/source/API/SBWatchpoint.cpp lldb/source/API/SystemInitializerFull.cpp lldb/source/API/SystemInitializerFull.h lldb/source/API/Utils.h lldb/source/Breakpoint/BreakpointList.cpp lldb/source/Breakpoint/BreakpointPrecondition.cpp +lldb/source/Breakpoint/BreakpointResolverAddress.cpp lldb/source/Breakpoint/BreakpointSiteList.cpp lldb/source/Breakpoint/StoppointCallbackContext.cpp lldb/source/Breakpoint/WatchpointList.cpp @@ -3666,6 +4000,7 @@ lldb/source/Core/AddressResolverFileLine.cpp lldb/source/Core/Communication.cpp lldb/source/Core/Declaration.cpp +lldb/source/Core/DumpDataExtractor.cpp lldb/source/Core/DumpRegisterValue.cpp lldb/source/Core/EmulateInstruction.cpp lldb/source/Core/FileLineResolver.cpp @@ -3716,6 +4051,7 @@ lldb/source/Host/freebsd/HostInfoFreeBSD.cpp lldb/source/Host/linux/AbstractSocket.cpp lldb/source/Host/linux/Host.cpp +lldb/source/Host/linux/HostInfoLinux.cpp lldb/source/Host/linux/LibcGlue.cpp lldb/source/Host/linux/Support.cpp lldb/source/Host/macosx/cfcpp/CFCBundle.cpp @@ -3732,6 +4068,7 @@ lldb/source/Host/macosx/cfcpp/CoreFoundationCPP.h lldb/source/Host/macosx/objcxx/PosixSpawnResponsible.h lldb/source/Host/openbsd/HostInfoOpenBSD.cpp +lldb/source/Host/posix/ConnectionFileDescriptorPosix.cpp lldb/source/Host/posix/FileSystemPosix.cpp lldb/source/Host/posix/HostInfoPosix.cpp lldb/source/Host/posix/HostThreadPosix.cpp @@ -3881,7 +4218,6 @@ lldb/source/Plugins/Language/ObjCPlusPlus/ObjCPlusPlusLanguage.h lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCClassDescriptorV2.h lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCDeclVendor.h -lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.cpp lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.h lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCTypeEncodingParser.cpp lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCTypeEncodingParser.h @@ -3963,18 +4299,16 @@ lldb/source/Plugins/Process/FreeBSD/NativeThreadFreeBSD.cpp lldb/source/Plugins/Process/FreeBSD/NativeThreadFreeBSD.h lldb/source/Plugins/Process/FreeBSDKernel/ProcessFreeBSDKernel.cpp -lldb/source/Plugins/Process/FreeBSDKernel/ProcessFreeBSDKernel.h lldb/source/Plugins/Process/FreeBSDKernel/RegisterContextFreeBSDKernel_arm64.cpp lldb/source/Plugins/Process/FreeBSDKernel/RegisterContextFreeBSDKernel_arm64.h lldb/source/Plugins/Process/FreeBSDKernel/RegisterContextFreeBSDKernel_i386.h lldb/source/Plugins/Process/FreeBSDKernel/RegisterContextFreeBSDKernel_x86_64.cpp lldb/source/Plugins/Process/FreeBSDKernel/RegisterContextFreeBSDKernel_x86_64.h +lldb/source/Plugins/Process/FreeBSDKernel/ThreadFreeBSDKernel.cpp lldb/source/Plugins/Process/FreeBSDKernel/ThreadFreeBSDKernel.h lldb/source/Plugins/Process/gdb-remote/GDBRemoteClientBase.h lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunication.h lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationHistory.h -lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationReplayServer.cpp -lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationReplayServer.h lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServer.cpp lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServer.h lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerCommon.h @@ -3982,6 +4316,9 @@ lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerPlatform.cpp lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationServerPlatform.h lldb/source/Plugins/Process/gdb-remote/GDBRemoteRegisterContext.h +lldb/source/Plugins/Process/gdb-remote/GDBRemoteRegisterFallback.cpp +lldb/source/Plugins/Process/gdb-remote/GDBRemoteRegisterFallback.h +lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemoteLog.h lldb/source/Plugins/Process/gdb-remote/ThreadGDBRemote.cpp lldb/source/Plugins/Process/gdb-remote/ThreadGDBRemote.h lldb/source/Plugins/Process/Linux/IntelPTManager.cpp @@ -4030,6 +4367,8 @@ lldb/source/Plugins/Process/POSIX/NativeProcessELF.h lldb/source/Plugins/Process/POSIX/ProcessMessage.cpp lldb/source/Plugins/Process/POSIX/ProcessMessage.h +lldb/source/Plugins/Process/POSIX/ProcessPOSIXLog.cpp +lldb/source/Plugins/Process/POSIX/ProcessPOSIXLog.h lldb/source/Plugins/Process/scripted/ScriptedProcess.cpp lldb/source/Plugins/Process/scripted/ScriptedProcess.h lldb/source/Plugins/Process/scripted/ScriptedThread.cpp @@ -4207,6 +4546,7 @@ lldb/source/Plugins/SymbolFile/DWARF/DWARFTypeUnit.h lldb/source/Plugins/SymbolFile/DWARF/HashedNameToDIE.cpp lldb/source/Plugins/SymbolFile/DWARF/HashedNameToDIE.h +lldb/source/Plugins/SymbolFile/DWARF/LogChannelDWARF.h lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.h lldb/source/Plugins/SymbolFile/DWARF/NameToDIE.cpp lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp @@ -4293,6 +4633,7 @@ lldb/source/Target/Language.cpp lldb/source/Target/MemoryHistory.cpp lldb/source/Target/MemoryRegionInfo.cpp +lldb/source/Target/MemoryTagMap.cpp lldb/source/Target/ModuleCache.cpp lldb/source/Target/OperatingSystem.cpp lldb/source/Target/ProcessTrace.cpp @@ -4302,11 +4643,11 @@ lldb/source/Target/SectionLoadHistory.cpp lldb/source/Target/SectionLoadList.cpp lldb/source/Target/StackID.cpp -lldb/source/Target/Statistics.cpp lldb/source/Target/SystemRuntime.cpp lldb/source/Target/ThreadCollection.cpp lldb/source/Target/ThreadPlanCallFunctionUsingABI.cpp lldb/source/Target/ThreadPlanCallOnFunctionExit.cpp +lldb/source/Target/ThreadPlanCallUserExpression.cpp lldb/source/Target/ThreadPlanRunToAddress.cpp lldb/source/Target/ThreadPlanShouldStopHere.cpp lldb/source/Target/ThreadPlanStepInRange.cpp @@ -4332,15 +4673,16 @@ lldb/source/Utility/DataExtractor.cpp lldb/source/Utility/Environment.cpp lldb/source/Utility/GDBRemote.cpp +lldb/source/Utility/Instrumentation.cpp lldb/source/Utility/IOObject.cpp lldb/source/Utility/Listener.cpp lldb/source/Utility/LLDBAssert.cpp +lldb/source/Utility/LLDBLog.cpp lldb/source/Utility/NameMatches.cpp lldb/source/Utility/PPC64LE_DWARF_Registers.h lldb/source/Utility/PPC64_DWARF_Registers.h lldb/source/Utility/RegularExpression.cpp lldb/source/Utility/Reproducer.cpp -lldb/source/Utility/ReproducerInstrumentation.cpp lldb/source/Utility/ReproducerProvider.cpp lldb/source/Utility/State.cpp lldb/source/Utility/Status.cpp @@ -4460,7 +4802,6 @@ lldb/tools/lldb-vscode/SourceBreakpoint.cpp lldb/tools/lldb-vscode/SourceBreakpoint.h lldb/tools/lldb-vscode/SourceReference.h -lldb/tools/lldb-vscode/VSCode.cpp lldb/tools/lldb-vscode/VSCode.h lldb/tools/lldb-vscode/VSCodeForward.h lldb/unittests/gtest_common.h @@ -4495,13 +4836,16 @@ lldb/unittests/Host/ProcessLaunchInfoTest.cpp lldb/unittests/Host/SocketAddressTest.cpp lldb/unittests/Host/SocketTestUtilities.h +lldb/unittests/Host/ThreadLauncherTest.cpp lldb/unittests/Host/linux/HostTest.cpp lldb/unittests/Host/linux/SupportTest.cpp lldb/unittests/Interpreter/TestOptionValueFileColonLine.cpp +lldb/unittests/Interpreter/TestRegexCommand.cpp lldb/unittests/Language/CLanguages/CLanguagesTest.cpp lldb/unittests/Language/Highlighting/HighlighterTest.cpp lldb/unittests/ObjectFile/Breakpad/BreakpadRecordsTest.cpp lldb/unittests/Platform/PlatformDarwinTest.cpp +lldb/unittests/Platform/PlatformSiginfoTest.cpp lldb/unittests/Process/ProcessEventDataTest.cpp lldb/unittests/Process/gdb-remote/GDBRemoteCommunicationServerLLGSTest.cpp lldb/unittests/Process/gdb-remote/GDBRemoteCommunicationServerTest.cpp @@ -4541,6 +4885,7 @@ lldb/unittests/Target/DynamicRegisterInfoTest.cpp lldb/unittests/Target/ExecutionContextTest.cpp lldb/unittests/Target/FindFileTest.cpp +lldb/unittests/Target/MemoryTagMapTest.cpp lldb/unittests/Target/RemoteAwarePlatformTest.cpp lldb/unittests/Target/StackFrameRecognizerTest.cpp lldb/unittests/TestingSupport/MockTildeExpressionResolver.cpp @@ -4635,17 +4980,16 @@ llvm/include/llvm/ADT/ilist_iterator.h llvm/include/llvm/ADT/ilist_node.h llvm/include/llvm/ADT/ilist_node_base.h -llvm/include/llvm/ADT/ilist_node_options.h llvm/include/llvm/ADT/IntrusiveRefCntPtr.h llvm/include/llvm/ADT/PointerEmbeddedInt.h llvm/include/llvm/ADT/ScopeExit.h llvm/include/llvm/ADT/Sequence.h llvm/include/llvm/ADT/simple_ilist.h llvm/include/llvm/ADT/Statistic.h +llvm/include/llvm/ADT/STLArrayExtras.h llvm/include/llvm/ADT/STLForwardCompat.h llvm/include/llvm/ADT/StringSet.h llvm/include/llvm/ADT/TypeSwitch.h -llvm/include/llvm/ADT/Waymarking.h llvm/include/llvm/Analysis/BlockFrequencyInfo.h llvm/include/llvm/Analysis/CFLAliasAnalysisUtils.h llvm/include/llvm/Analysis/CFLAndersAliasAnalysis.h @@ -4669,6 +5013,7 @@ llvm/include/llvm/Analysis/InlineSizeEstimatorAnalysis.h llvm/include/llvm/Analysis/InstCount.h llvm/include/llvm/Analysis/InstructionSimplify.h +llvm/include/llvm/Analysis/InstSimplifyFolder.h llvm/include/llvm/Analysis/IteratedDominanceFrontier.h llvm/include/llvm/Analysis/Lint.h llvm/include/llvm/Analysis/LoopCacheAnalysis.h @@ -4685,9 +5030,9 @@ llvm/include/llvm/Analysis/ObjCARCUtil.h llvm/include/llvm/Analysis/OverflowInstAnalysis.h llvm/include/llvm/Analysis/PhiValues.h -llvm/include/llvm/Analysis/ReleaseModeModelRunner.h llvm/include/llvm/Analysis/ReplayInlineAdvisor.h llvm/include/llvm/Analysis/ScalarEvolutionDivision.h +llvm/include/llvm/Analysis/ScalarEvolutionExpressions.h llvm/include/llvm/Analysis/ScalarEvolutionNormalization.h llvm/include/llvm/Analysis/ScopedNoAliasAA.h llvm/include/llvm/Analysis/StackLifetime.h @@ -4700,17 +5045,16 @@ llvm/include/llvm/Analysis/Utils/TFUtils.h llvm/include/llvm/AsmParser/LLToken.h llvm/include/llvm/AsmParser/SlotMapping.h -llvm/include/llvm/BinaryFormat/AMDGPUMetadataVerifier.h llvm/include/llvm/BinaryFormat/COFF.h llvm/include/llvm/BinaryFormat/Magic.h llvm/include/llvm/BinaryFormat/Minidump.h llvm/include/llvm/BinaryFormat/MsgPackDocument.h llvm/include/llvm/BinaryFormat/MsgPackReader.h llvm/include/llvm/BinaryFormat/MsgPackWriter.h +llvm/include/llvm/BinaryFormat/Swift.h llvm/include/llvm/BinaryFormat/WasmTraits.h llvm/include/llvm/Bitcode/BitcodeAnalyzer.h llvm/include/llvm/Bitcode/BitcodeCommon.h -llvm/include/llvm/Bitcode/BitcodeConvenience.h llvm/include/llvm/CodeGen/AsmPrinter.h llvm/include/llvm/CodeGen/AsmPrinterHandler.h llvm/include/llvm/CodeGen/BasicBlockSectionUtils.h @@ -4737,7 +5081,6 @@ llvm/include/llvm/CodeGen/MachineLoopUtils.h llvm/include/llvm/CodeGen/MachineModuleInfoImpls.h llvm/include/llvm/CodeGen/MachineModuleSlotTracker.h -llvm/include/llvm/CodeGen/MachineOutliner.h llvm/include/llvm/CodeGen/MachinePassManager.h llvm/include/llvm/CodeGen/MachineRegionInfo.h llvm/include/llvm/CodeGen/MachineSSAContext.h @@ -4751,6 +5094,7 @@ llvm/include/llvm/CodeGen/PBQPRAConstraint.h llvm/include/llvm/CodeGen/PreISelIntrinsicLowering.h llvm/include/llvm/CodeGen/RegisterBank.h +llvm/include/llvm/CodeGen/RegisterBankInfo.h llvm/include/llvm/CodeGen/RegisterClassInfo.h llvm/include/llvm/CodeGen/ReplaceWithVeclib.h llvm/include/llvm/CodeGen/ScheduleDAGMutation.h @@ -4909,9 +5253,11 @@ llvm/include/llvm/DebugInfo/PDB/Native/RawError.h llvm/include/llvm/DebugInfo/PDB/Native/RawTypes.h llvm/include/llvm/DebugInfo/PDB/Native/TpiHashing.h +llvm/include/llvm/DebugInfo/Symbolize/DIFetcher.h llvm/include/llvm/DebugInfo/Symbolize/DIPrinter.h llvm/include/llvm/DebugInfo/Symbolize/Symbolize.h llvm/include/llvm/Debuginfod/Debuginfod.h +llvm/include/llvm/Debuginfod/DIFetcher.h llvm/include/llvm/Debuginfod/HTTPClient.h llvm/include/llvm/Demangle/Demangle.h llvm/include/llvm/Demangle/StringView.h @@ -4926,6 +5272,7 @@ llvm/include/llvm/ExecutionEngine/GenericValue.h llvm/include/llvm/ExecutionEngine/SectionMemoryManager.h llvm/include/llvm/ExecutionEngine/JITLink/aarch64.h +llvm/include/llvm/ExecutionEngine/JITLink/EHFrameSupport.h llvm/include/llvm/ExecutionEngine/JITLink/ELF.h llvm/include/llvm/ExecutionEngine/JITLink/ELF_aarch64.h llvm/include/llvm/ExecutionEngine/JITLink/ELF_riscv.h @@ -4960,6 +5307,7 @@ llvm/include/llvm/ExecutionEngine/Orc/MachOPlatform.h llvm/include/llvm/ExecutionEngine/Orc/Mangling.h llvm/include/llvm/ExecutionEngine/Orc/ObjectFileInterface.h +llvm/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h llvm/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h llvm/include/llvm/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.h @@ -5022,6 +5370,7 @@ llvm/include/llvm/MC/MCAsmInfoXCOFF.h llvm/include/llvm/MC/MCAsmLayout.h llvm/include/llvm/MC/MCCodeView.h +llvm/include/llvm/MC/MCContext.h llvm/include/llvm/MC/MCFixedLenDisassembler.h llvm/include/llvm/MC/MCLabel.h llvm/include/llvm/MC/MCObjectWriter.h @@ -5063,18 +5412,19 @@ llvm/include/llvm/MCA/Stages/MicroOpQueueStage.h llvm/include/llvm/MCA/Stages/RetireStage.h llvm/include/llvm/MCA/Stages/Stage.h -llvm/include/llvm/ObjCopy/MultiFormatConfig.h -llvm/include/llvm/ObjCopy/ConfigManager.h llvm/include/llvm/ObjCopy/CommonConfig.h +llvm/include/llvm/ObjCopy/MultiFormatConfig.h llvm/include/llvm/ObjCopy/ObjCopy.h -llvm/include/llvm/ObjCopy/wasm/WasmConfig.h -llvm/include/llvm/ObjCopy/wasm/WasmObjcopy.h +llvm/include/llvm/ObjCopy/COFF/COFFConfig.h +llvm/include/llvm/ObjCopy/COFF/COFFObjcopy.h llvm/include/llvm/ObjCopy/ELF/ELFConfig.h llvm/include/llvm/ObjCopy/ELF/ELFObjcopy.h -llvm/include/llvm/ObjCopy/MachO/MachOObjcopy.h llvm/include/llvm/ObjCopy/MachO/MachOConfig.h -llvm/include/llvm/ObjCopy/COFF/COFFConfig.h -llvm/include/llvm/ObjCopy/COFF/COFFObjcopy.h +llvm/include/llvm/ObjCopy/MachO/MachOObjcopy.h +llvm/include/llvm/ObjCopy/wasm/WasmConfig.h +llvm/include/llvm/ObjCopy/wasm/WasmObjcopy.h +llvm/include/llvm/ObjCopy/XCOFF/XCOFFConfig.h +llvm/include/llvm/ObjCopy/XCOFF/XCOFFObjcopy.h llvm/include/llvm/Object/Archive.h llvm/include/llvm/Object/COFFModuleDefinition.h llvm/include/llvm/Object/Decompressor.h @@ -5140,6 +5490,9 @@ llvm/include/llvm/Support/CFGUpdate.h llvm/include/llvm/Support/CodeGenCoverage.h llvm/include/llvm/Support/CRC.h +llvm/include/llvm/Support/CSKYAttributeParser.h +llvm/include/llvm/Support/CSKYAttributes.h +llvm/include/llvm/Support/CSKYTargetParser.h llvm/include/llvm/Support/DataTypes.h llvm/include/llvm/Support/DebugCounter.h llvm/include/llvm/Support/Discriminator.h @@ -5179,17 +5532,16 @@ llvm/include/llvm/Support/SymbolRemappingReader.h llvm/include/llvm/Support/SystemUtils.h llvm/include/llvm/Support/TargetParser.h -llvm/include/llvm/Support/TimeProfiler.h llvm/include/llvm/Support/TrailingObjects.h llvm/include/llvm/Support/Unicode.h llvm/include/llvm/Support/UnicodeCharRanges.h llvm/include/llvm/Support/VersionTuple.h -llvm/include/llvm/Support/VirtualFileSystem.h llvm/include/llvm/Support/WindowsError.h llvm/include/llvm/Support/WithColor.h llvm/include/llvm/Support/FileSystem/UniqueID.h llvm/include/llvm/Support/Solaris/sys/regset.h llvm/include/llvm/TableGen/DirectiveEmitter.h +llvm/include/llvm/TableGen/Parser.h llvm/include/llvm/TableGen/StringToOffsetTable.h llvm/include/llvm/Target/CGPassBuilderOption.h llvm/include/llvm/Target/CodeGenCWrappers.h @@ -5223,6 +5575,7 @@ llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h llvm/include/llvm/Transforms/Instrumentation/SanitizerCoverage.h llvm/include/llvm/Transforms/IPO/Annotation2Metadata.h +llvm/include/llvm/Transforms/IPO/ArgumentPromotion.h llvm/include/llvm/Transforms/IPO/Attributor.h llvm/include/llvm/Transforms/IPO/BlockExtractor.h llvm/include/llvm/Transforms/IPO/CalledValuePropagation.h @@ -5323,6 +5676,7 @@ llvm/include/llvm/Transforms/Utils/LowerSwitch.h llvm/include/llvm/Transforms/Utils/MatrixUtils.h llvm/include/llvm/Transforms/Utils/Mem2Reg.h +llvm/include/llvm/Transforms/Utils/MemoryTaggingSupport.h llvm/include/llvm/Transforms/Utils/MetaRenamer.h llvm/include/llvm/Transforms/Utils/NameAnonGlobals.h llvm/include/llvm/Transforms/Utils/RelLookupTableConverter.h @@ -5340,7 +5694,6 @@ llvm/include/llvm/Transforms/Utils/UnifyLoopExits.h llvm/include/llvm/Transforms/Utils/ValueMapper.h llvm/include/llvm/Transforms/Vectorize/SLPVectorizer.h -llvm/include/llvm/WindowsDriver/MSVCPaths.h llvm/include/llvm/WindowsDriver/MSVCSetupApi.h llvm/include/llvm/WindowsManifest/WindowsManifestMerger.h llvm/include/llvm/WindowsResource/ResourceScriptToken.h @@ -5413,6 +5766,7 @@ llvm/lib/Analysis/ValueLatticeUtils.cpp llvm/lib/Analysis/VFABIDemangling.cpp llvm/lib/AsmParser/Parser.cpp +llvm/lib/BinaryFormat/COFF.cpp llvm/lib/BinaryFormat/ELF.cpp llvm/lib/BinaryFormat/MachO.cpp llvm/lib/BinaryFormat/Magic.cpp @@ -5437,6 +5791,7 @@ llvm/lib/CodeGen/FixupStatepointCallerSaved.cpp llvm/lib/CodeGen/GCMetadataPrinter.cpp llvm/lib/CodeGen/IndirectBrExpandPass.cpp +llvm/lib/CodeGen/JMCInstrumenter.cpp llvm/lib/CodeGen/LiveDebugVariables.h llvm/lib/CodeGen/LiveIntervalCalc.cpp llvm/lib/CodeGen/LiveRangeShrink.cpp @@ -5462,6 +5817,7 @@ llvm/lib/CodeGen/MIRSampleProfile.cpp llvm/lib/CodeGen/MIRVRegNamerUtils.cpp llvm/lib/CodeGen/MIRYamlMapping.cpp +llvm/lib/CodeGen/MLRegallocEvictAdvisor.cpp llvm/lib/CodeGen/MultiHazardRecognizer.cpp llvm/lib/CodeGen/NonRelocatableStringpool.cpp llvm/lib/CodeGen/ParallelCG.cpp @@ -5470,6 +5826,7 @@ llvm/lib/CodeGen/RegAllocBase.cpp llvm/lib/CodeGen/RegAllocEvictionAdvisor.cpp llvm/lib/CodeGen/RegAllocEvictionAdvisor.h +llvm/lib/CodeGen/RegAllocGreedy.h llvm/lib/CodeGen/RegAllocScore.cpp llvm/lib/CodeGen/RegAllocScore.h llvm/lib/CodeGen/RemoveRedundantDebugValues.cpp @@ -5509,7 +5866,6 @@ llvm/lib/CodeGen/GlobalISel/Localizer.cpp llvm/lib/CodeGen/GlobalISel/LostDebugLocObserver.cpp llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.cpp -llvm/lib/CodeGen/LiveDebugValues/LiveDebugValues.h llvm/lib/CodeGen/MIRParser/MILexer.h llvm/lib/CodeGen/SelectionDAG/SelectionDAGTargetInfo.cpp llvm/lib/CodeGen/SelectionDAG/StatepointLowering.h @@ -5557,6 +5913,7 @@ llvm/lib/DebugInfo/PDB/PDBSymbol.cpp llvm/lib/DebugInfo/PDB/PDBSymbolAnnotation.cpp llvm/lib/DebugInfo/PDB/PDBSymbolBlock.cpp +llvm/lib/DebugInfo/PDB/PDBSymbolCompiland.cpp llvm/lib/DebugInfo/PDB/PDBSymbolCompilandDetails.cpp llvm/lib/DebugInfo/PDB/PDBSymbolCompilandEnv.cpp llvm/lib/DebugInfo/PDB/PDBSymbolCustom.cpp @@ -5636,9 +5993,11 @@ llvm/lib/DebugInfo/PDB/Native/SymbolStream.cpp llvm/lib/DebugInfo/PDB/Native/TpiHashing.cpp llvm/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp +llvm/lib/DebugInfo/Symbolize/DIFetcher.cpp llvm/lib/DebugInfo/Symbolize/DIPrinter.cpp llvm/lib/DebugInfo/Symbolize/Symbolize.cpp llvm/lib/Debuginfod/Debuginfod.cpp +llvm/lib/Debuginfod/DIFetcher.cpp llvm/lib/Debuginfod/HTTPClient.cpp llvm/lib/Demangle/Demangle.cpp llvm/lib/Demangle/DLangDemangle.cpp @@ -5652,10 +6011,8 @@ llvm/lib/ExecutionEngine/SectionMemoryManager.cpp llvm/lib/ExecutionEngine/JITLink/aarch64.cpp llvm/lib/ExecutionEngine/JITLink/DefineExternalSectionStartAndEndSymbols.h -llvm/lib/ExecutionEngine/JITLink/EHFrameSupport.cpp llvm/lib/ExecutionEngine/JITLink/ELF.cpp llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.cpp -llvm/lib/ExecutionEngine/JITLink/ELFLinkGraphBuilder.h llvm/lib/ExecutionEngine/JITLink/ELF_aarch64.cpp llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp llvm/lib/ExecutionEngine/JITLink/JITLink.cpp @@ -5689,6 +6046,7 @@ llvm/lib/ExecutionEngine/Orc/SpeculateAnalyses.cpp llvm/lib/ExecutionEngine/Orc/Speculation.cpp llvm/lib/ExecutionEngine/Orc/TaskDispatch.cpp +llvm/lib/ExecutionEngine/Orc/Shared/AllocationActions.cpp llvm/lib/ExecutionEngine/Orc/Shared/OrcError.cpp llvm/lib/ExecutionEngine/Orc/Shared/OrcRTBridge.cpp llvm/lib/ExecutionEngine/Orc/Shared/SimpleRemoteEPCUtils.cpp @@ -5779,38 +6137,43 @@ llvm/lib/MCA/Stages/RetireStage.cpp llvm/lib/MCA/Stages/Stage.cpp llvm/lib/ObjCopy/Archive.cpp -llvm/lib/ObjCopy/ConfigManager.cpp -llvm/lib/ObjCopy/ObjCopy.cpp llvm/lib/ObjCopy/Archive.h -llvm/lib/ObjCopy/wasm/Reader.cpp -llvm/lib/ObjCopy/wasm/Reader.h -llvm/lib/ObjCopy/wasm/Object.cpp -llvm/lib/ObjCopy/wasm/Writer.cpp -llvm/lib/ObjCopy/wasm/Writer.h -llvm/lib/ObjCopy/wasm/Object.h -llvm/lib/ObjCopy/wasm/WasmObjcopy.cpp -llvm/lib/ObjCopy/ELF/Object.cpp -llvm/lib/ObjCopy/MachO/MachOWriter.cpp -llvm/lib/ObjCopy/MachO/Object.cpp -llvm/lib/ObjCopy/MachO/MachOLayoutBuilder.h -llvm/lib/ObjCopy/MachO/MachOWriter.h -llvm/lib/ObjCopy/MachO/MachOReader.h -llvm/lib/ObjCopy/MachO/MachOReader.cpp -llvm/lib/ObjCopy/MachO/Object.h +llvm/lib/ObjCopy/ConfigManager.cpp +llvm/lib/ObjCopy/COFF/COFFObjcopy.cpp +llvm/lib/ObjCopy/COFF/COFFObject.cpp +llvm/lib/ObjCopy/COFF/COFFObject.h +llvm/lib/ObjCopy/COFF/COFFReader.cpp +llvm/lib/ObjCopy/COFF/COFFReader.h +llvm/lib/ObjCopy/COFF/COFFWriter.cpp +llvm/lib/ObjCopy/COFF/COFFWriter.h +llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp +llvm/lib/ObjCopy/ELF/ELFObject.cpp +llvm/lib/ObjCopy/ELF/ELFObject.h llvm/lib/ObjCopy/MachO/MachOLayoutBuilder.cpp +llvm/lib/ObjCopy/MachO/MachOLayoutBuilder.h llvm/lib/ObjCopy/MachO/MachOObjcopy.cpp -llvm/lib/ObjCopy/COFF/Reader.cpp -llvm/lib/ObjCopy/COFF/Reader.h -llvm/lib/ObjCopy/COFF/Object.cpp -llvm/lib/ObjCopy/COFF/Writer.cpp -llvm/lib/ObjCopy/COFF/COFFObjcopy.cpp -llvm/lib/ObjCopy/COFF/Writer.h -llvm/lib/ObjCopy/COFF/Object.h +llvm/lib/ObjCopy/MachO/MachOObject.cpp +llvm/lib/ObjCopy/MachO/MachOObject.h +llvm/lib/ObjCopy/MachO/MachOReader.cpp +llvm/lib/ObjCopy/MachO/MachOReader.h +llvm/lib/ObjCopy/MachO/MachOWriter.cpp +llvm/lib/ObjCopy/MachO/MachOWriter.h +llvm/lib/ObjCopy/wasm/WasmObjcopy.cpp +llvm/lib/ObjCopy/wasm/WasmObject.cpp +llvm/lib/ObjCopy/wasm/WasmObject.h +llvm/lib/ObjCopy/wasm/WasmReader.cpp +llvm/lib/ObjCopy/wasm/WasmReader.h +llvm/lib/ObjCopy/wasm/WasmWriter.cpp +llvm/lib/ObjCopy/wasm/WasmWriter.h +llvm/lib/ObjCopy/XCOFF/XCOFFObject.h +llvm/lib/ObjCopy/XCOFF/XCOFFReader.cpp +llvm/lib/ObjCopy/XCOFF/XCOFFReader.h llvm/lib/Object/Archive.cpp llvm/lib/Object/Binary.cpp llvm/lib/Object/Decompressor.cpp llvm/lib/Object/FaultMapParser.cpp llvm/lib/Object/IRObjectFile.cpp +llvm/lib/Object/IRSymtab.cpp llvm/lib/Object/MachOUniversalWriter.cpp llvm/lib/Object/Minidump.cpp llvm/lib/Object/ModuleSymbolTable.cpp @@ -5835,7 +6198,7 @@ llvm/lib/Passes/PassPlugin.cpp llvm/lib/ProfileData/GCOV.cpp llvm/lib/ProfileData/InstrProfCorrelator.cpp -llvm/lib/ProfileData/RawMemProfReader.cpp +llvm/lib/ProfileData/MemProf.cpp llvm/lib/ProfileData/SampleProfWriter.cpp llvm/lib/Remarks/BitstreamRemarkParser.h llvm/lib/Remarks/BitstreamRemarkSerializer.cpp @@ -5862,6 +6225,9 @@ llvm/lib/Support/COM.cpp llvm/lib/Support/Compression.cpp llvm/lib/Support/CRC.cpp +llvm/lib/Support/CSKYAttributeParser.cpp +llvm/lib/Support/CSKYAttributes.cpp +llvm/lib/Support/CSKYTargetParser.cpp llvm/lib/Support/DebugOptions.h llvm/lib/Support/DivisionByConstantInfo.cpp llvm/lib/Support/DJB.cpp @@ -5905,12 +6271,16 @@ llvm/lib/Support/VersionTuple.cpp llvm/lib/Support/Watchdog.cpp llvm/lib/Support/WithColor.cpp +llvm/lib/TableGen/Parser.cpp +llvm/lib/TableGen/RecordContext.h llvm/lib/TableGen/TableGenBackendSkeleton.cpp +llvm/lib/Target/TargetIntrinsicInfo.cpp llvm/lib/Target/AArch64/AArch64CompressJumpTables.cpp llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp -llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.cpp +llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp llvm/lib/Target/AArch64/AArch64PBQPRegAlloc.h llvm/lib/Target/AArch64/AArch64SpeculationHardening.cpp +llvm/lib/Target/AArch64/AArch64StackTagging.cpp llvm/lib/Target/AArch64/AArch64TargetObjectFile.h llvm/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.h llvm/lib/Target/AArch64/GISel/AArch64GlobalISelUtils.cpp @@ -5941,7 +6311,6 @@ llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.h llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.cpp llvm/lib/Target/AMDGPU/AMDGPUMIRFormatter.h -llvm/lib/Target/AMDGPU/AMDGPUPromoteKernelArguments.cpp llvm/lib/Target/AMDGPU/AMDGPUPTNote.h llvm/lib/Target/AMDGPU/AMDGPUReplaceLDSUseWithPointer.cpp llvm/lib/Target/AMDGPU/AMDGPUResourceUsageAnalysis.h @@ -5969,10 +6338,9 @@ llvm/lib/Target/AMDGPU/MCTargetDesc/R600InstPrinter.cpp llvm/lib/Target/AMDGPU/MCTargetDesc/R600InstPrinter.h llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.cpp -llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCTargetDesc.h llvm/lib/Target/AMDGPU/TargetInfo/AMDGPUTargetInfo.cpp -llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.cpp -llvm/lib/Target/AMDGPU/Utils/AMDGPULDSUtils.h +llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.cpp +llvm/lib/Target/AMDGPU/Utils/AMDGPUMemoryUtils.h llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.h llvm/lib/Target/ARC/ARC.h @@ -6019,7 +6387,6 @@ llvm/lib/Target/AVR/AVR.h llvm/lib/Target/AVR/AVRAsmPrinter.cpp llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp -llvm/lib/Target/AVR/AVRFrameLowering.cpp llvm/lib/Target/AVR/AVRFrameLowering.h llvm/lib/Target/AVR/AVRInstrInfo.cpp llvm/lib/Target/AVR/AVRInstrInfo.h @@ -6052,14 +6419,12 @@ llvm/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.h llvm/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.cpp llvm/lib/Target/AVR/MCTargetDesc/AVRMCAsmInfo.h -llvm/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.cpp llvm/lib/Target/AVR/MCTargetDesc/AVRMCCodeEmitter.h llvm/lib/Target/AVR/MCTargetDesc/AVRMCELFStreamer.cpp llvm/lib/Target/AVR/MCTargetDesc/AVRMCELFStreamer.h llvm/lib/Target/AVR/MCTargetDesc/AVRMCExpr.cpp llvm/lib/Target/AVR/MCTargetDesc/AVRMCExpr.h llvm/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.cpp -llvm/lib/Target/AVR/MCTargetDesc/AVRMCTargetDesc.h llvm/lib/Target/AVR/MCTargetDesc/AVRTargetStreamer.cpp llvm/lib/Target/AVR/MCTargetDesc/AVRTargetStreamer.h llvm/lib/Target/AVR/TargetInfo/AVRTargetInfo.cpp @@ -6079,6 +6444,9 @@ llvm/lib/Target/CSKY/CSKYAsmPrinter.cpp llvm/lib/Target/CSKY/CSKYAsmPrinter.h llvm/lib/Target/CSKY/CSKYCallingConv.h +llvm/lib/Target/CSKY/CSKYConstantIslandPass.cpp +llvm/lib/Target/CSKY/CSKYConstantPoolValue.cpp +llvm/lib/Target/CSKY/CSKYConstantPoolValue.h llvm/lib/Target/CSKY/CSKYFrameLowering.cpp llvm/lib/Target/CSKY/CSKYFrameLowering.h llvm/lib/Target/CSKY/CSKYInstrInfo.cpp @@ -6096,6 +6464,7 @@ llvm/lib/Target/CSKY/CSKYTargetMachine.cpp llvm/lib/Target/CSKY/CSKYTargetMachine.h llvm/lib/Target/CSKY/AsmParser/CSKYAsmParser.cpp +llvm/lib/Target/CSKY/Disassembler/CSKYDisassembler.cpp llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.cpp llvm/lib/Target/CSKY/MCTargetDesc/CSKYAsmBackend.h llvm/lib/Target/CSKY/MCTargetDesc/CSKYBaseInfo.h @@ -6110,7 +6479,6 @@ llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCExpr.cpp llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCExpr.h llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.cpp -llvm/lib/Target/CSKY/MCTargetDesc/CSKYMCTargetDesc.h llvm/lib/Target/CSKY/TargetInfo/CSKYTargetInfo.cpp llvm/lib/Target/CSKY/TargetInfo/CSKYTargetInfo.h llvm/lib/Target/Hexagon/HexagonLoopIdiomRecognition.h @@ -6162,6 +6530,39 @@ llvm/lib/Target/Lanai/MCTargetDesc/LanaiMCTargetDesc.h llvm/lib/Target/Lanai/TargetInfo/LanaiTargetInfo.cpp llvm/lib/Target/Lanai/TargetInfo/LanaiTargetInfo.h +llvm/lib/Target/LoongArch/LoongArch.h +llvm/lib/Target/LoongArch/LoongArchAsmPrinter.cpp +llvm/lib/Target/LoongArch/LoongArchAsmPrinter.h +llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp +llvm/lib/Target/LoongArch/LoongArchFrameLowering.h +llvm/lib/Target/LoongArch/LoongArchInstrInfo.cpp +llvm/lib/Target/LoongArch/LoongArchInstrInfo.h +llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp +llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h +llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp +llvm/lib/Target/LoongArch/LoongArchISelLowering.h +llvm/lib/Target/LoongArch/LoongArchMachineFunctionInfo.h +llvm/lib/Target/LoongArch/LoongArchMCInstLower.cpp +llvm/lib/Target/LoongArch/LoongArchRegisterInfo.cpp +llvm/lib/Target/LoongArch/LoongArchRegisterInfo.h +llvm/lib/Target/LoongArch/LoongArchSubtarget.cpp +llvm/lib/Target/LoongArch/LoongArchSubtarget.h +llvm/lib/Target/LoongArch/LoongArchTargetMachine.cpp +llvm/lib/Target/LoongArch/LoongArchTargetMachine.h +llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.cpp +llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchAsmBackend.h +llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.cpp +llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchBaseInfo.h +llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchELFObjectWriter.cpp +llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.cpp +llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchInstPrinter.h +llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCAsmInfo.cpp +llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCAsmInfo.h +llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp +llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.cpp +llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCTargetDesc.h +llvm/lib/Target/LoongArch/TargetInfo/LoongArchTargetInfo.cpp +llvm/lib/Target/LoongArch/TargetInfo/LoongArchTargetInfo.h llvm/lib/Target/M68k/M68k.h llvm/lib/Target/M68k/M68kAsmPrinter.cpp llvm/lib/Target/M68k/M68kAsmPrinter.h @@ -6206,7 +6607,6 @@ llvm/lib/Target/M68k/MCTargetDesc/M68kMCCodeEmitter.cpp llvm/lib/Target/M68k/MCTargetDesc/M68kMCCodeEmitter.h llvm/lib/Target/M68k/MCTargetDesc/M68kMCTargetDesc.cpp -llvm/lib/Target/M68k/MCTargetDesc/M68kMCTargetDesc.h llvm/lib/Target/M68k/TargetInfo/M68kTargetInfo.cpp llvm/lib/Target/M68k/TargetInfo/M68kTargetInfo.h llvm/lib/Target/Mips/Mips16RegisterInfo.h @@ -6253,6 +6653,7 @@ llvm/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp llvm/lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.h llvm/lib/Target/PowerPC/PPCExpandAtomicPseudoInsts.cpp +llvm/lib/Target/PowerPC/PPCGenScalarMASSEntries.cpp llvm/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp llvm/lib/Target/PowerPC/PPCTargetMachine.h llvm/lib/Target/PowerPC/PPCTargetStreamer.h @@ -6272,17 +6673,16 @@ llvm/lib/Target/PowerPC/TargetInfo/PowerPCTargetInfo.h llvm/lib/Target/RISCV/RISCVCallLowering.cpp llvm/lib/Target/RISCV/RISCVCallLowering.h -llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp -llvm/lib/Target/RISCV/RISCVGatherScatterLowering.cpp -llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp llvm/lib/Target/RISCV/RISCVInstructionSelector.cpp llvm/lib/Target/RISCV/RISCVLegalizerInfo.cpp llvm/lib/Target/RISCV/RISCVLegalizerInfo.h llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h llvm/lib/Target/RISCV/RISCVMergeBaseOffset.cpp +llvm/lib/Target/RISCV/RISCVRedundantCopyElimination.cpp llvm/lib/Target/RISCV/RISCVRegisterBankInfo.cpp llvm/lib/Target/RISCV/RISCVRegisterBankInfo.h +llvm/lib/Target/RISCV/RISCVSExtWRemoval.cpp llvm/lib/Target/RISCV/RISCVTargetMachine.h llvm/lib/Target/RISCV/RISCVTargetObjectFile.cpp llvm/lib/Target/RISCV/RISCVTargetObjectFile.h @@ -6326,11 +6726,12 @@ llvm/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.h llvm/lib/Target/VE/LVLGen.cpp llvm/lib/Target/VE/VEAsmPrinter.cpp +llvm/lib/Target/VE/VECustomDAG.cpp +llvm/lib/Target/VE/VECustomDAG.h llvm/lib/Target/VE/VEFrameLowering.h llvm/lib/Target/VE/VEInstrBuilder.h llvm/lib/Target/VE/VEInstrInfo.h llvm/lib/Target/VE/VEISelDAGToDAG.cpp -llvm/lib/Target/VE/VEISelLowering.cpp llvm/lib/Target/VE/VEMachineFunctionInfo.cpp llvm/lib/Target/VE/VEMachineFunctionInfo.h llvm/lib/Target/VE/VEMCInstLower.cpp @@ -6341,6 +6742,7 @@ llvm/lib/Target/VE/VETargetMachine.cpp llvm/lib/Target/VE/VETargetMachine.h llvm/lib/Target/VE/VETargetTransformInfo.h +llvm/lib/Target/VE/VVPISelLowering.cpp llvm/lib/Target/VE/AsmParser/VEAsmParser.cpp llvm/lib/Target/VE/Disassembler/VEDisassembler.cpp llvm/lib/Target/VE/MCTargetDesc/VEAsmBackend.cpp @@ -6422,6 +6824,7 @@ llvm/lib/Target/X86/X86FastTileConfig.cpp llvm/lib/Target/X86/X86InsertPrefetch.cpp llvm/lib/Target/X86/X86InsertWait.cpp +llvm/lib/Target/X86/X86InstCombineIntrinsic.cpp llvm/lib/Target/X86/X86InterleavedAccess.cpp llvm/lib/Target/X86/X86LegalizerInfo.h llvm/lib/Target/X86/X86LoadValueInjectionRetHardening.cpp @@ -6435,6 +6838,8 @@ llvm/lib/Target/X86/X86SpeculativeExecutionSideEffectSuppression.cpp llvm/lib/Target/X86/X86TargetMachine.h llvm/lib/Target/X86/X86TileConfig.cpp +llvm/lib/Target/X86/MCA/X86CustomBehaviour.cpp +llvm/lib/Target/X86/MCA/X86CustomBehaviour.h llvm/lib/Target/X86/MCTargetDesc/X86ATTInstPrinter.h llvm/lib/Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp llvm/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h @@ -6450,7 +6855,6 @@ llvm/lib/Testing/Support/Annotations.cpp llvm/lib/Testing/Support/Error.cpp llvm/lib/Testing/Support/SupportHelpers.cpp -llvm/lib/TextAPI/Architecture.cpp llvm/lib/TextAPI/ArchitectureSet.cpp llvm/lib/TextAPI/InterfaceFile.cpp llvm/lib/TextAPI/PackedVersion.cpp @@ -6460,12 +6864,10 @@ llvm/lib/TextAPI/TextAPIContext.h llvm/lib/TextAPI/TextStub.cpp llvm/lib/TextAPI/TextStubCommon.cpp -llvm/lib/TextAPI/TextStubCommon.h llvm/lib/Transforms/AggressiveInstCombine/AggressiveInstCombineInternal.h llvm/lib/Transforms/CFGuard/CFGuard.cpp llvm/lib/Transforms/InstCombine/InstCombineNegator.cpp llvm/lib/Transforms/Instrumentation/CFGMST.h -llvm/lib/Transforms/Instrumentation/CGProfile.cpp llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -6474,7 +6876,6 @@ llvm/lib/Transforms/Instrumentation/ValueProfileCollector.h llvm/lib/Transforms/IPO/Annotation2Metadata.cpp llvm/lib/Transforms/IPO/Attributor.cpp -llvm/lib/Transforms/IPO/AttributorAttributes.cpp llvm/lib/Transforms/IPO/ElimAvailExtern.cpp llvm/lib/Transforms/IPO/ModuleInliner.cpp llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -6515,6 +6916,7 @@ llvm/lib/Transforms/Utils/InjectTLIMappings.cpp llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp llvm/lib/Transforms/Utils/MatrixUtils.cpp +llvm/lib/Transforms/Utils/MemoryTaggingSupport.cpp llvm/lib/Transforms/Utils/SampleProfileInference.cpp llvm/lib/Transforms/Utils/SampleProfileLoaderBaseUtil.cpp llvm/lib/Transforms/Utils/SCCPSolver.cpp @@ -6583,9 +6985,9 @@ llvm/tools/llvm-cov/SourceCoverageViewText.h llvm/tools/llvm-cov/TestingSupport.cpp llvm/tools/llvm-cxxdump/Error.cpp -llvm/tools/llvm-cxxdump/llvm-cxxdump.h llvm/tools/llvm-cxxfilt/llvm-cxxfilt.cpp llvm/tools/llvm-debuginfod-find/llvm-debuginfod-find.cpp +llvm/tools/llvm-dis-fuzzer/llvm-dis-fuzzer.cpp llvm/tools/llvm-dlang-demangle-fuzzer/DummyDemanglerFuzzer.cpp llvm/tools/llvm-dlang-demangle-fuzzer/llvm-dlang-demangle-fuzzer.cpp llvm/tools/llvm-dwarfdump/llvm-dwarfdump.cpp @@ -6670,7 +7072,6 @@ llvm/tools/llvm-ml/Disassembler.h llvm/tools/llvm-modextract/llvm-modextract.cpp llvm/tools/llvm-objcopy/llvm-objcopy.cpp -llvm/tools/llvm-objcopy/ObjcopyOptions.cpp llvm/tools/llvm-objcopy/ObjcopyOptions.h llvm/tools/llvm-objdump/COFFDump.h llvm/tools/llvm-objdump/ELFDump.h @@ -6703,7 +7104,6 @@ llvm/tools/llvm-profgen/llvm-profgen.cpp llvm/tools/llvm-profgen/PerfReader.cpp llvm/tools/llvm-profgen/PerfReader.h -llvm/tools/llvm-profgen/ProfileGenerator.h llvm/tools/llvm-rc/ResourceScriptCppFilter.cpp llvm/tools/llvm-rc/ResourceScriptCppFilter.h llvm/tools/llvm-rc/ResourceScriptParser.h @@ -6715,7 +7115,6 @@ llvm/tools/llvm-readobj/WindowsResourceDumper.h llvm/tools/llvm-reduce/DeltaManager.cpp llvm/tools/llvm-reduce/DeltaManager.h -llvm/tools/llvm-reduce/llvm-reduce.cpp llvm/tools/llvm-reduce/ReducerWorkItem.cpp llvm/tools/llvm-reduce/ReducerWorkItem.h llvm/tools/llvm-reduce/TestRunner.cpp @@ -6759,12 +7158,12 @@ llvm/tools/llvm-reduce/deltas/ReduceOperandsToArgs.h llvm/tools/llvm-reduce/deltas/ReduceSpecialGlobals.cpp llvm/tools/llvm-reduce/deltas/ReduceSpecialGlobals.h +llvm/tools/llvm-remark-size-diff/RemarkSizeDiff.cpp llvm/tools/llvm-rust-demangle-fuzzer/DummyDemanglerFuzzer.cpp llvm/tools/llvm-rust-demangle-fuzzer/llvm-rust-demangle-fuzzer.cpp llvm/tools/llvm-shlib/libllvm.cpp llvm/tools/llvm-special-case-list-fuzzer/DummySpecialCaseListFuzzer.cpp llvm/tools/llvm-special-case-list-fuzzer/special-case-list-fuzzer.cpp -llvm/tools/llvm-split/llvm-split.cpp llvm/tools/llvm-strings/llvm-strings.cpp llvm/tools/llvm-tapi-diff/DiffEngine.cpp llvm/tools/llvm-tapi-diff/DiffEngine.h @@ -6810,7 +7209,6 @@ llvm/unittests/ADT/DirectedGraphTest.cpp llvm/unittests/ADT/EnumeratedArrayTest.cpp llvm/unittests/ADT/FallibleIteratorTest.cpp -llvm/unittests/ADT/FunctionExtrasTest.cpp llvm/unittests/ADT/FunctionRefTest.cpp llvm/unittests/ADT/IListBaseTest.cpp llvm/unittests/ADT/IListNodeBaseTest.cpp @@ -6821,7 +7219,6 @@ llvm/unittests/ADT/ScopeExitTest.cpp llvm/unittests/ADT/SequenceTest.cpp llvm/unittests/ADT/SetVectorTest.cpp -llvm/unittests/ADT/SimpleIListTest.cpp llvm/unittests/ADT/SmallSetTest.cpp llvm/unittests/ADT/SparseMultiSetTest.cpp llvm/unittests/ADT/SparseSetTest.cpp @@ -6832,7 +7229,6 @@ llvm/unittests/ADT/StringSwitchTest.cpp llvm/unittests/ADT/TypeSwitchTest.cpp llvm/unittests/ADT/TypeTraitsTest.cpp -llvm/unittests/ADT/WaymarkingTest.cpp llvm/unittests/Analysis/BasicAliasAnalysisTest.cpp llvm/unittests/Analysis/BlockFrequencyInfoTest.cpp llvm/unittests/Analysis/BranchProbabilityInfoTest.cpp @@ -6855,6 +7251,7 @@ llvm/unittests/Bitcode/DataLayoutUpgradeTest.cpp llvm/unittests/Bitstream/BitstreamWriterTest.cpp llvm/unittests/CodeGen/AllocationOrderTest.cpp +llvm/unittests/CodeGen/AMDGPUMetadataTest.cpp llvm/unittests/CodeGen/AsmPrinterDwarfTest.cpp llvm/unittests/CodeGen/DIETest.cpp llvm/unittests/CodeGen/LexicalScopesTest.cpp @@ -6870,6 +7267,7 @@ llvm/unittests/DebugInfo/DWARF/DWARFAcceleratorTableTest.cpp llvm/unittests/DebugInfo/DWARF/DWARFDataExtractorTest.cpp llvm/unittests/DebugInfo/DWARF/DWARFDebugArangeSetTest.cpp +llvm/unittests/DebugInfo/DWARF/DWARFDebugFrameTest.cpp llvm/unittests/DebugInfo/DWARF/DWARFDieManualExtractTest.cpp llvm/unittests/DebugInfo/DWARF/DWARFDieTest.cpp llvm/unittests/DebugInfo/DWARF/DWARFExpressionCompactPrinterTest.cpp @@ -6909,12 +7307,12 @@ llvm/unittests/ExecutionEngine/Orc/ThreadSafeModuleTest.cpp llvm/unittests/Frontend/OpenACCTest.cpp llvm/unittests/Frontend/OpenMPContextTest.cpp -llvm/unittests/Frontend/OpenMPIRBuilderTest.cpp llvm/unittests/Frontend/OpenMPParsingTest.cpp llvm/unittests/InterfaceStub/ELFYAMLTest.cpp llvm/unittests/IR/DemandedBitsTest.cpp llvm/unittests/IR/ManglerTest.cpp llvm/unittests/IR/ModuleTest.cpp +llvm/unittests/IR/TimePassesTest.cpp llvm/unittests/IR/UseTest.cpp llvm/unittests/IR/VectorTypesTest.cpp llvm/unittests/MC/Disassembler.cpp @@ -6940,6 +7338,7 @@ llvm/unittests/Passes/TestPlugin.cpp llvm/unittests/Passes/TestPlugin.h llvm/unittests/ProfileData/InstrProfDataTest.cpp +llvm/unittests/ProfileData/MemProfTest.cpp llvm/unittests/Remarks/BitstreamRemarksFormatTest.cpp llvm/unittests/Remarks/BitstreamRemarksParsingTest.cpp llvm/unittests/Remarks/RemarksLinkingTest.cpp @@ -6950,6 +7349,8 @@ llvm/unittests/Support/Base64Test.cpp llvm/unittests/Support/buffer_ostream_test.cpp llvm/unittests/Support/Chrono.cpp +llvm/unittests/Support/CSKYAttributeParserTest.cpp +llvm/unittests/Support/CSKYTargetParserTest.cpp llvm/unittests/Support/DebugCounterTest.cpp llvm/unittests/Support/DJBTest.cpp llvm/unittests/Support/ELFAttributeParserTest.cpp @@ -6979,10 +7380,13 @@ llvm/unittests/Support/WithColorTest.cpp llvm/unittests/Support/xxhashTest.cpp llvm/unittests/Support/CommandLineInit/CommandLineInitTest.cpp +llvm/unittests/TableGen/ParserEntryPointTest.cpp llvm/unittests/Target/AArch64/MatrixRegisterAliasing.cpp llvm/unittests/Target/AMDGPU/DwarfRegMappings.cpp llvm/unittests/Target/AMDGPU/ExecMayBeModifiedBeforeAnyUse.cpp +llvm/unittests/Target/ARM/InstSizes.cpp llvm/unittests/Target/PowerPC/AIXRelocModelTest.cpp +llvm/unittests/Testing/Support/TempPathTest.cpp llvm/unittests/TextAPI/TextStubHelpers.h llvm/unittests/TextAPI/TextStubV1Tests.cpp llvm/unittests/TextAPI/TextStubV2Tests.cpp @@ -7005,13 +7409,13 @@ llvm/unittests/tools/llvm-exegesis/X86/TestBase.h llvm/unittests/Transforms/IPO/AttributorTest.cpp llvm/unittests/Transforms/IPO/AttributorTestBase.h +llvm/unittests/Transforms/Utils/BasicBlockUtilsTest.cpp llvm/unittests/Transforms/Utils/CallPromotionUtilsTest.cpp llvm/unittests/Transforms/Utils/CodeMoverUtilsTest.cpp llvm/unittests/Transforms/Utils/LoopUtilsTest.cpp llvm/unittests/Transforms/Utils/ModuleUtilsTest.cpp llvm/unittests/Transforms/Utils/ScalarEvolutionExpanderTest.cpp llvm/unittests/Transforms/Utils/SSAUpdaterBulkTest.cpp -llvm/unittests/Transforms/Utils/ValueMapperTest.cpp llvm/unittests/Transforms/Utils/VFABIUtils.cpp llvm/unittests/Transforms/Vectorize/VPlanDominatorTreeTest.cpp llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp @@ -7038,6 +7442,8 @@ llvm/utils/TableGen/OptRSTEmitter.cpp llvm/utils/TableGen/PredicateExpander.h llvm/utils/TableGen/SDNodeProperties.cpp +llvm/utils/TableGen/VarLenCodeEmitterGen.cpp +llvm/utils/TableGen/VarLenCodeEmitterGen.h llvm/utils/TableGen/WebAssemblyDisassemblerEmitter.h llvm/utils/TableGen/GlobalISel/CodeExpander.cpp llvm/utils/TableGen/GlobalISel/CodeExpander.h @@ -7045,6 +7451,8 @@ llvm/utils/TableGen/GlobalISel/GIMatchDagEdge.cpp llvm/utils/TableGen/GlobalISel/GIMatchDagInstr.cpp llvm/utils/TableGen/GlobalISel/GIMatchDagInstr.h +llvm/utils/TableGen/GlobalISel/GIMatchDagPredicate.cpp +llvm/utils/TableGen/GlobalISel/GIMatchDagPredicate.h llvm/utils/TableGen/GlobalISel/GIMatchDagPredicateDependencyEdge.cpp llvm/utils/TableGen/GlobalISel/GIMatchDagPredicateDependencyEdge.h mlir/examples/standalone/include/Standalone/StandaloneDialect.h @@ -7135,23 +7543,19 @@ mlir/include/mlir/InitAllTranslations.h mlir/include/mlir/Parser.h mlir/include/mlir/Translation.h -mlir/include/mlir/Analysis/AffineAnalysis.h -mlir/include/mlir/Analysis/AffineStructures.h mlir/include/mlir/Analysis/BufferViewFlowAnalysis.h mlir/include/mlir/Analysis/DataFlowAnalysis.h mlir/include/mlir/Analysis/DataLayoutAnalysis.h mlir/include/mlir/Analysis/Liveness.h -mlir/include/mlir/Analysis/LoopAnalysis.h -mlir/include/mlir/Analysis/NestedMatcher.h -mlir/include/mlir/Analysis/NumberOfExecutions.h -mlir/include/mlir/Analysis/PresburgerSet.h mlir/include/mlir/Analysis/SliceAnalysis.h -mlir/include/mlir/Analysis/Utils.h mlir/include/mlir/Analysis/AliasAnalysis/LocalAliasAnalysis.h mlir/include/mlir/Analysis/Presburger/Fraction.h -mlir/include/mlir/Analysis/Presburger/IntegerPolyhedron.h +mlir/include/mlir/Analysis/Presburger/IntegerRelation.h mlir/include/mlir/Analysis/Presburger/LinearTransform.h mlir/include/mlir/Analysis/Presburger/Matrix.h +mlir/include/mlir/Analysis/Presburger/PresburgerSet.h +mlir/include/mlir/Analysis/Presburger/PresburgerSpace.h +mlir/include/mlir/Analysis/Presburger/PWMAFunction.h mlir/include/mlir/Analysis/Presburger/Simplex.h mlir/include/mlir/Analysis/Presburger/Utils.h mlir/include/mlir/CAPI/AffineExpr.h @@ -7175,7 +7579,13 @@ mlir/include/mlir/Conversion/BufferizationToMemRef/BufferizationToMemRef.h mlir/include/mlir/Conversion/ComplexToLLVM/ComplexToLLVM.h mlir/include/mlir/Conversion/ComplexToStandard/ComplexToStandard.h +mlir/include/mlir/Conversion/ControlFlowToLLVM/ControlFlowToLLVM.h +mlir/include/mlir/Conversion/ControlFlowToSPIRV/ControlFlowToSPIRV.h +mlir/include/mlir/Conversion/ControlFlowToSPIRV/ControlFlowToSPIRVPass.h +mlir/include/mlir/Conversion/FuncToSPIRV/FuncToSPIRV.h +mlir/include/mlir/Conversion/FuncToSPIRV/FuncToSPIRVPass.h mlir/include/mlir/Conversion/GPUCommon/GPUCommonPass.h +mlir/include/mlir/Conversion/GPUToNVVM/GPUToNVVMPass.h mlir/include/mlir/Conversion/GPUToROCDL/GPUToROCDLPass.h mlir/include/mlir/Conversion/GPUToROCDL/Runtimes.h mlir/include/mlir/Conversion/GPUToSPIRV/GPUToSPIRV.h @@ -7204,19 +7614,19 @@ mlir/include/mlir/Conversion/OpenMPToLLVM/ConvertOpenMPToLLVM.h mlir/include/mlir/Conversion/PDLToPDLInterp/PDLToPDLInterp.h mlir/include/mlir/Conversion/ReconcileUnrealizedCasts/ReconcileUnrealizedCasts.h +mlir/include/mlir/Conversion/SCFToControlFlow/SCFToControlFlow.h mlir/include/mlir/Conversion/SCFToGPU/SCFToGPU.h mlir/include/mlir/Conversion/SCFToGPU/SCFToGPUPass.h mlir/include/mlir/Conversion/SCFToOpenMP/SCFToOpenMP.h mlir/include/mlir/Conversion/SCFToSPIRV/SCFToSPIRV.h mlir/include/mlir/Conversion/SCFToSPIRV/SCFToSPIRVPass.h -mlir/include/mlir/Conversion/SCFToStandard/SCFToStandard.h mlir/include/mlir/Conversion/ShapeToStandard/ShapeToStandard.h mlir/include/mlir/Conversion/SPIRVToLLVM/SPIRVToLLVM.h mlir/include/mlir/Conversion/SPIRVToLLVM/SPIRVToLLVMPass.h mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h mlir/include/mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h -mlir/include/mlir/Conversion/StandardToSPIRV/StandardToSPIRV.h -mlir/include/mlir/Conversion/StandardToSPIRV/StandardToSPIRVPass.h +mlir/include/mlir/Conversion/TensorToSPIRV/TensorToSPIRV.h +mlir/include/mlir/Conversion/TensorToSPIRV/TensorToSPIRVPass.h mlir/include/mlir/Conversion/TosaToLinalg/TosaToLinalg.h mlir/include/mlir/Conversion/TosaToSCF/TosaToSCF.h mlir/include/mlir/Conversion/TosaToStandard/TosaToStandard.h @@ -7228,15 +7638,24 @@ mlir/include/mlir/Conversion/VectorToSPIRV/VectorToSPIRVPass.h mlir/include/mlir/Dialect/CommonFolders.h mlir/include/mlir/Dialect/Traits.h +mlir/include/mlir/Dialect/Affine/LoopFusionUtils.h +mlir/include/mlir/Dialect/Affine/LoopUtils.h mlir/include/mlir/Dialect/Affine/Passes.h mlir/include/mlir/Dialect/Affine/Utils.h +mlir/include/mlir/Dialect/Affine/Analysis/AffineAnalysis.h +mlir/include/mlir/Dialect/Affine/Analysis/AffineStructures.h +mlir/include/mlir/Dialect/Affine/Analysis/LoopAnalysis.h +mlir/include/mlir/Dialect/Affine/Analysis/NestedMatcher.h +mlir/include/mlir/Dialect/Affine/Analysis/Utils.h mlir/include/mlir/Dialect/Affine/IR/AffineMemoryOpInterfaces.h mlir/include/mlir/Dialect/Affine/IR/AffineOps.h mlir/include/mlir/Dialect/Affine/IR/AffineValueMap.h mlir/include/mlir/Dialect/AMX/AMXDialect.h mlir/include/mlir/Dialect/AMX/Transforms.h mlir/include/mlir/Dialect/Arithmetic/IR/Arithmetic.h +mlir/include/mlir/Dialect/Arithmetic/Transforms/BufferizableOpInterfaceImpl.h mlir/include/mlir/Dialect/Arithmetic/Transforms/Passes.h +mlir/include/mlir/Dialect/Arithmetic/Utils/Utils.h mlir/include/mlir/Dialect/ArmNeon/ArmNeonDialect.h mlir/include/mlir/Dialect/ArmSVE/ArmSVEDialect.h mlir/include/mlir/Dialect/ArmSVE/Transforms.h @@ -7245,13 +7664,22 @@ mlir/include/mlir/Dialect/Async/IR/Async.h mlir/include/mlir/Dialect/Async/IR/AsyncTypes.h mlir/include/mlir/Dialect/Bufferization/IR/AllocationOpInterface.h +mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h mlir/include/mlir/Dialect/Bufferization/IR/Bufferization.h mlir/include/mlir/Dialect/Bufferization/Transforms/Bufferize.h +mlir/include/mlir/Dialect/Bufferization/Transforms/BufferUtils.h +mlir/include/mlir/Dialect/Bufferization/Transforms/OneShotAnalysis.h mlir/include/mlir/Dialect/Bufferization/Transforms/Passes.h mlir/include/mlir/Dialect/Complex/IR/Complex.h +mlir/include/mlir/Dialect/ControlFlow/IR/ControlFlow.h +mlir/include/mlir/Dialect/ControlFlow/IR/ControlFlowOps.h mlir/include/mlir/Dialect/DLTI/DLTI.h mlir/include/mlir/Dialect/DLTI/Traits.h mlir/include/mlir/Dialect/EmitC/IR/EmitC.h +mlir/include/mlir/Dialect/Func/IR/FuncOps.h +mlir/include/mlir/Dialect/Func/Transforms/DecomposeCallGraphTypes.h +mlir/include/mlir/Dialect/Func/Transforms/FuncConversions.h +mlir/include/mlir/Dialect/Func/Transforms/Passes.h mlir/include/mlir/Dialect/GPU/GPUDialect.h mlir/include/mlir/Dialect/GPU/MemoryPromotion.h mlir/include/mlir/Dialect/GPU/ParallelLoopMapper.h @@ -7260,16 +7688,10 @@ mlir/include/mlir/Dialect/Linalg/Passes.h mlir/include/mlir/Dialect/Linalg/Analysis/DependenceAnalysis.h mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/AffineInterfaceImpl.h -mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/ArithInterfaceImpl.h -mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/BufferizationInterfaceImpl.h -mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/ComprehensiveBufferize.h -mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/LinalgInterfaceImpl.h mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/ModuleBufferization.h -mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/SCFInterfaceImpl.h -mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/TensorInterfaceImpl.h -mlir/include/mlir/Dialect/Linalg/ComprehensiveBufferize/VectorInterfaceImpl.h mlir/include/mlir/Dialect/Linalg/IR/Linalg.h mlir/include/mlir/Dialect/Linalg/IR/LinalgInterfaces.h +mlir/include/mlir/Dialect/Linalg/Transforms/BufferizableOpInterfaceImpl.h mlir/include/mlir/Dialect/Linalg/Transforms/CodegenStrategy.h mlir/include/mlir/Dialect/Linalg/Transforms/Hoisting.h mlir/include/mlir/Dialect/Linalg/Transforms/HoistPadding.h @@ -7286,6 +7708,7 @@ mlir/include/mlir/Dialect/Math/Transforms/Approximation.h mlir/include/mlir/Dialect/Math/Transforms/Passes.h mlir/include/mlir/Dialect/MemRef/IR/MemRef.h +mlir/include/mlir/Dialect/MemRef/Transforms/ComposeSubView.h mlir/include/mlir/Dialect/MemRef/Transforms/Passes.h mlir/include/mlir/Dialect/MemRef/Utils/MemRefUtils.h mlir/include/mlir/Dialect/OpenACC/OpenACC.h @@ -7300,13 +7723,16 @@ mlir/include/mlir/Dialect/Quant/QuantOps.h mlir/include/mlir/Dialect/Quant/QuantTypes.h mlir/include/mlir/Dialect/Quant/UniformSupport.h -mlir/include/mlir/Dialect/SCF/AffineCanonicalizationUtils.h +mlir/include/mlir/Dialect/SCF/BufferizableOpInterfaceImpl.h mlir/include/mlir/Dialect/SCF/Passes.h mlir/include/mlir/Dialect/SCF/SCF.h mlir/include/mlir/Dialect/SCF/Transforms.h +mlir/include/mlir/Dialect/SCF/Utils/AffineCanonicalizationUtils.h +mlir/include/mlir/Dialect/SCF/Utils/Utils.h mlir/include/mlir/Dialect/Shape/IR/Shape.h mlir/include/mlir/Dialect/Shape/Transforms/Passes.h mlir/include/mlir/Dialect/SparseTensor/IR/SparseTensor.h +mlir/include/mlir/Dialect/SparseTensor/Pipelines/Passes.h mlir/include/mlir/Dialect/SparseTensor/Transforms/Passes.h mlir/include/mlir/Dialect/SparseTensor/Utils/Merger.h mlir/include/mlir/Dialect/SPIRV/IR/ParserUtils.h @@ -7322,27 +7748,29 @@ mlir/include/mlir/Dialect/SPIRV/Transforms/Passes.h mlir/include/mlir/Dialect/SPIRV/Transforms/SPIRVConversion.h mlir/include/mlir/Dialect/SPIRV/Utils/LayoutUtils.h -mlir/include/mlir/Dialect/StandardOps/IR/Ops.h -mlir/include/mlir/Dialect/StandardOps/Transforms/ComposeSubView.h -mlir/include/mlir/Dialect/StandardOps/Transforms/DecomposeCallGraphTypes.h -mlir/include/mlir/Dialect/StandardOps/Transforms/FuncConversions.h -mlir/include/mlir/Dialect/StandardOps/Transforms/Passes.h -mlir/include/mlir/Dialect/StandardOps/Utils/Utils.h mlir/include/mlir/Dialect/Tensor/IR/Tensor.h mlir/include/mlir/Dialect/Tensor/IR/TensorInferTypeOpInterfaceImpl.h +mlir/include/mlir/Dialect/Tensor/IR/TensorTilingInterfaceImpl.h +mlir/include/mlir/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.h mlir/include/mlir/Dialect/Tensor/Transforms/Passes.h +mlir/include/mlir/Dialect/Tensor/Transforms/Transforms.h +mlir/include/mlir/Dialect/Tensor/Utils/Utils.h mlir/include/mlir/Dialect/Tosa/IR/TosaOps.h mlir/include/mlir/Dialect/Tosa/Transforms/PassDetail.h mlir/include/mlir/Dialect/Tosa/Transforms/Passes.h +mlir/include/mlir/Dialect/Tosa/Utils/CoversionUtils.h mlir/include/mlir/Dialect/Tosa/Utils/QuantUtils.h mlir/include/mlir/Dialect/Tosa/Utils/ShapeUtils.h +mlir/include/mlir/Dialect/Utils/IndexingUtils.h mlir/include/mlir/Dialect/Utils/ReshapeOpsUtils.h mlir/include/mlir/Dialect/Utils/StaticValueUtils.h mlir/include/mlir/Dialect/Utils/StructuredOpsUtils.h -mlir/include/mlir/Dialect/Vector/VectorOps.h -mlir/include/mlir/Dialect/Vector/VectorRewritePatterns.h -mlir/include/mlir/Dialect/Vector/VectorTransforms.h -mlir/include/mlir/Dialect/Vector/VectorUtils.h +mlir/include/mlir/Dialect/Vector/IR/VectorOps.h +mlir/include/mlir/Dialect/Vector/Transforms/BufferizableOpInterfaceImpl.h +mlir/include/mlir/Dialect/Vector/Transforms/Passes.h +mlir/include/mlir/Dialect/Vector/Transforms/VectorRewritePatterns.h +mlir/include/mlir/Dialect/Vector/Transforms/VectorTransforms.h +mlir/include/mlir/Dialect/Vector/Utils/VectorUtils.h mlir/include/mlir/Dialect/X86Vector/Transforms.h mlir/include/mlir/Dialect/X86Vector/X86VectorDialect.h mlir/include/mlir/ExecutionEngine/AsyncRuntime.h @@ -7350,6 +7778,7 @@ mlir/include/mlir/ExecutionEngine/JitRunner.h mlir/include/mlir/ExecutionEngine/MemRefUtils.h mlir/include/mlir/ExecutionEngine/OptUtils.h +mlir/include/mlir/ExecutionEngine/RunnerUtils.h mlir/include/mlir/ExecutionEngine/SparseTensorUtils.h mlir/include/mlir/Interfaces/CallInterfaces.h mlir/include/mlir/Interfaces/CastInterfaces.h @@ -7376,8 +7805,7 @@ mlir/include/mlir/IR/DialectImplementation.h mlir/include/mlir/IR/Dominance.h mlir/include/mlir/IR/FunctionImplementation.h -mlir/include/mlir/IR/FunctionSupport.h -mlir/include/mlir/IR/Identifier.h +mlir/include/mlir/IR/FunctionInterfaces.h mlir/include/mlir/IR/ImplicitLocOpBuilder.h mlir/include/mlir/IR/Matchers.h mlir/include/mlir/IR/MLIRContext.h @@ -7393,6 +7821,7 @@ mlir/include/mlir/IR/TypeUtilities.h mlir/include/mlir/IR/Value.h mlir/include/mlir/IR/Verifier.h +mlir/include/mlir/IR/Visitors.h mlir/include/mlir/Parser/AsmParserState.h mlir/include/mlir/Reducer/PassDetail.h mlir/include/mlir/Reducer/Passes.h @@ -7453,17 +7882,20 @@ mlir/include/mlir/Tools/mlir-reduce/MlirReduceMain.h mlir/include/mlir/Tools/PDLL/AST/Context.h mlir/include/mlir/Tools/PDLL/AST/Diagnostic.h +mlir/include/mlir/Tools/PDLL/CodeGen/CPPGen.h +mlir/include/mlir/Tools/PDLL/CodeGen/MLIRGen.h +mlir/include/mlir/Tools/PDLL/ODS/Constraint.h +mlir/include/mlir/Tools/PDLL/ODS/Context.h +mlir/include/mlir/Tools/PDLL/ODS/Dialect.h +mlir/include/mlir/Tools/PDLL/ODS/Operation.h mlir/include/mlir/Tools/PDLL/Parser/Parser.h -mlir/include/mlir/Transforms/BufferUtils.h +mlir/include/mlir/Transforms/ControlFlowSinkUtils.h mlir/include/mlir/Transforms/DialectConversion.h mlir/include/mlir/Transforms/GreedyPatternRewriteDriver.h mlir/include/mlir/Transforms/InliningUtils.h mlir/include/mlir/Transforms/LocationSnapshot.h -mlir/include/mlir/Transforms/LoopFusionUtils.h -mlir/include/mlir/Transforms/LoopUtils.h mlir/include/mlir/Transforms/Passes.h mlir/include/mlir/Transforms/RegionUtils.h -mlir/include/mlir/Transforms/Utils.h mlir/include/mlir-c/AffineExpr.h mlir/include/mlir-c/AffineMap.h mlir/include/mlir-c/BuiltinAttributes.h @@ -7481,44 +7913,44 @@ mlir/include/mlir-c/Transforms.h mlir/include/mlir-c/Bindings/Python/Interop.h mlir/include/mlir-c/Dialect/Async.h +mlir/include/mlir-c/Dialect/Func.h mlir/include/mlir-c/Dialect/GPU.h mlir/include/mlir-c/Dialect/Linalg.h mlir/include/mlir-c/Dialect/LLVM.h +mlir/include/mlir-c/Dialect/PDL.h +mlir/include/mlir-c/Dialect/Quant.h mlir/include/mlir-c/Dialect/SCF.h mlir/include/mlir-c/Dialect/Shape.h mlir/include/mlir-c/Dialect/SparseTensor.h -mlir/include/mlir-c/Dialect/Standard.h mlir/include/mlir-c/Dialect/Tensor.h -mlir/lib/Analysis/AffineAnalysis.cpp -mlir/lib/Analysis/AffineStructures.cpp mlir/lib/Analysis/AliasAnalysis.cpp mlir/lib/Analysis/BufferViewFlowAnalysis.cpp mlir/lib/Analysis/CallGraph.cpp mlir/lib/Analysis/DataFlowAnalysis.cpp mlir/lib/Analysis/DataLayoutAnalysis.cpp mlir/lib/Analysis/Liveness.cpp -mlir/lib/Analysis/LoopAnalysis.cpp -mlir/lib/Analysis/NestedMatcher.cpp -mlir/lib/Analysis/NumberOfExecutions.cpp -mlir/lib/Analysis/PresburgerSet.cpp mlir/lib/Analysis/SliceAnalysis.cpp -mlir/lib/Analysis/Utils.cpp mlir/lib/Analysis/AliasAnalysis/LocalAliasAnalysis.cpp -mlir/lib/Analysis/Presburger/IntegerPolyhedron.cpp +mlir/lib/Analysis/Presburger/IntegerRelation.cpp mlir/lib/Analysis/Presburger/LinearTransform.cpp mlir/lib/Analysis/Presburger/Matrix.cpp +mlir/lib/Analysis/Presburger/PresburgerSet.cpp +mlir/lib/Analysis/Presburger/PresburgerSpace.cpp +mlir/lib/Analysis/Presburger/PWMAFunction.cpp mlir/lib/Analysis/Presburger/Simplex.cpp mlir/lib/Analysis/Presburger/Utils.cpp mlir/lib/Bindings/Python/AllPassesRegistration.cpp mlir/lib/Bindings/Python/AsyncPasses.cpp mlir/lib/Bindings/Python/DialectLinalg.cpp -mlir/lib/Bindings/Python/Dialects.h +mlir/lib/Bindings/Python/DialectPDL.cpp +mlir/lib/Bindings/Python/DialectQuant.cpp mlir/lib/Bindings/Python/DialectSparseTensor.cpp mlir/lib/Bindings/Python/ExecutionEngineModule.cpp mlir/lib/Bindings/Python/Globals.h mlir/lib/Bindings/Python/GPUPasses.cpp mlir/lib/Bindings/Python/IRAffine.cpp mlir/lib/Bindings/Python/IRAttributes.cpp +mlir/lib/Bindings/Python/IRCore.cpp mlir/lib/Bindings/Python/IRInterfaces.cpp mlir/lib/Bindings/Python/IRModule.cpp mlir/lib/Bindings/Python/IRModule.h @@ -7536,16 +7968,18 @@ mlir/lib/CAPI/Debug/Debug.cpp mlir/lib/CAPI/Dialect/Async.cpp mlir/lib/CAPI/Dialect/AsyncPasses.cpp +mlir/lib/CAPI/Dialect/Func.cpp mlir/lib/CAPI/Dialect/GPU.cpp mlir/lib/CAPI/Dialect/GPUPasses.cpp mlir/lib/CAPI/Dialect/Linalg.cpp mlir/lib/CAPI/Dialect/LinalgPasses.cpp mlir/lib/CAPI/Dialect/LLVM.cpp +mlir/lib/CAPI/Dialect/PDL.cpp +mlir/lib/CAPI/Dialect/Quant.cpp mlir/lib/CAPI/Dialect/SCF.cpp mlir/lib/CAPI/Dialect/Shape.cpp mlir/lib/CAPI/Dialect/SparseTensor.cpp mlir/lib/CAPI/Dialect/SparseTensorPasses.cpp -mlir/lib/CAPI/Dialect/Standard.cpp mlir/lib/CAPI/Dialect/Tensor.cpp mlir/lib/CAPI/ExecutionEngine/ExecutionEngine.cpp mlir/lib/CAPI/Interfaces/Interfaces.cpp @@ -7569,9 +8003,13 @@ mlir/lib/Conversion/AsyncToLLVM/AsyncToLLVM.cpp mlir/lib/Conversion/ComplexToLLVM/ComplexToLLVM.cpp mlir/lib/Conversion/ComplexToStandard/ComplexToStandard.cpp +mlir/lib/Conversion/ControlFlowToLLVM/ControlFlowToLLVM.cpp +mlir/lib/Conversion/ControlFlowToSPIRV/ControlFlowToSPIRV.cpp +mlir/lib/Conversion/ControlFlowToSPIRV/ControlFlowToSPIRVPass.cpp +mlir/lib/Conversion/FuncToSPIRV/FuncToSPIRV.cpp +mlir/lib/Conversion/FuncToSPIRV/FuncToSPIRVPass.cpp mlir/lib/Conversion/GPUCommon/GPUOpsLowering.cpp mlir/lib/Conversion/GPUCommon/GPUOpsLowering.h -mlir/lib/Conversion/GPUCommon/GPUToLLVMConversion.cpp mlir/lib/Conversion/GPUCommon/IndexIntrinsicsOpLowering.h mlir/lib/Conversion/GPUCommon/OpToFuncCallLowering.h mlir/lib/Conversion/GPUToNVVM/LowerGpuOpsToNVVMOps.cpp @@ -7583,7 +8021,6 @@ mlir/lib/Conversion/GPUToVulkan/ConvertLaunchFuncToVulkanCalls.cpp mlir/lib/Conversion/LinalgToLLVM/LinalgToLLVM.cpp mlir/lib/Conversion/LinalgToSPIRV/LinalgToSPIRVPass.cpp -mlir/lib/Conversion/LinalgToStandard/LinalgToStandard.cpp mlir/lib/Conversion/LLVMCommon/ConversionTarget.cpp mlir/lib/Conversion/LLVMCommon/LoweringOptions.cpp mlir/lib/Conversion/LLVMCommon/MemRefBuilder.cpp @@ -7611,12 +8048,12 @@ mlir/lib/Conversion/PDLToPDLInterp/RootOrdering.cpp mlir/lib/Conversion/PDLToPDLInterp/RootOrdering.h mlir/lib/Conversion/ReconcileUnrealizedCasts/ReconcileUnrealizedCasts.cpp +mlir/lib/Conversion/SCFToControlFlow/SCFToControlFlow.cpp mlir/lib/Conversion/SCFToGPU/SCFToGPU.cpp mlir/lib/Conversion/SCFToGPU/SCFToGPUPass.cpp mlir/lib/Conversion/SCFToOpenMP/SCFToOpenMP.cpp mlir/lib/Conversion/SCFToSPIRV/SCFToSPIRV.cpp mlir/lib/Conversion/SCFToSPIRV/SCFToSPIRVPass.cpp -mlir/lib/Conversion/SCFToStandard/SCFToStandard.cpp mlir/lib/Conversion/ShapeToStandard/ConvertShapeConstraints.cpp mlir/lib/Conversion/ShapeToStandard/ShapeToStandard.cpp mlir/lib/Conversion/SPIRVCommon/Pattern.h @@ -7624,8 +8061,8 @@ mlir/lib/Conversion/SPIRVToLLVM/SPIRVToLLVM.cpp mlir/lib/Conversion/SPIRVToLLVM/SPIRVToLLVMPass.cpp mlir/lib/Conversion/StandardToLLVM/StandardToLLVM.cpp -mlir/lib/Conversion/StandardToSPIRV/StandardToSPIRV.cpp -mlir/lib/Conversion/StandardToSPIRV/StandardToSPIRVPass.cpp +mlir/lib/Conversion/TensorToSPIRV/TensorToSPIRV.cpp +mlir/lib/Conversion/TensorToSPIRV/TensorToSPIRVPass.cpp mlir/lib/Conversion/TosaToLinalg/TosaToLinalg.cpp mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamed.cpp mlir/lib/Conversion/TosaToLinalg/TosaToLinalgNamedPass.cpp @@ -7634,13 +8071,17 @@ mlir/lib/Conversion/TosaToSCF/TosaToSCFPass.cpp mlir/lib/Conversion/TosaToStandard/TosaToStandard.cpp mlir/lib/Conversion/TosaToStandard/TosaToStandardPass.cpp -mlir/lib/Conversion/VectorToGPU/VectorToGPU.cpp mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVMPass.cpp mlir/lib/Conversion/VectorToROCDL/VectorToROCDL.cpp mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp mlir/lib/Conversion/VectorToSPIRV/VectorToSPIRVPass.cpp mlir/lib/Dialect/Traits.cpp +mlir/lib/Dialect/Affine/Analysis/AffineAnalysis.cpp +mlir/lib/Dialect/Affine/Analysis/AffineStructures.cpp +mlir/lib/Dialect/Affine/Analysis/LoopAnalysis.cpp +mlir/lib/Dialect/Affine/Analysis/NestedMatcher.cpp +mlir/lib/Dialect/Affine/Analysis/Utils.cpp mlir/lib/Dialect/Affine/IR/AffineMemoryOpInterfaces.cpp mlir/lib/Dialect/Affine/IR/AffineValueMap.cpp mlir/lib/Dialect/Affine/Transforms/AffineDataCopyGeneration.cpp @@ -7648,20 +8089,25 @@ mlir/lib/Dialect/Affine/Transforms/AffineLoopNormalize.cpp mlir/lib/Dialect/Affine/Transforms/AffineParallelize.cpp mlir/lib/Dialect/Affine/Transforms/AffineScalarReplacement.cpp +mlir/lib/Dialect/Affine/Transforms/LoopCoalescing.cpp +mlir/lib/Dialect/Affine/Transforms/LoopFusion.cpp mlir/lib/Dialect/Affine/Transforms/LoopTiling.cpp mlir/lib/Dialect/Affine/Transforms/LoopUnroll.cpp mlir/lib/Dialect/Affine/Transforms/LoopUnrollAndJam.cpp mlir/lib/Dialect/Affine/Transforms/PassDetail.h +mlir/lib/Dialect/Affine/Transforms/PipelineDataTransfer.cpp mlir/lib/Dialect/Affine/Transforms/SimplifyAffineStructures.cpp mlir/lib/Dialect/Affine/Transforms/SuperVectorize.cpp +mlir/lib/Dialect/Affine/Utils/LoopFusionUtils.cpp +mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp mlir/lib/Dialect/Affine/Utils/Utils.cpp mlir/lib/Dialect/AMX/IR/AMXDialect.cpp mlir/lib/Dialect/AMX/Transforms/LegalizeForLLVMExport.cpp mlir/lib/Dialect/Arithmetic/IR/ArithmeticDialect.cpp -mlir/lib/Dialect/Arithmetic/IR/ArithmeticOps.cpp +mlir/lib/Dialect/Arithmetic/Transforms/BufferizableOpInterfaceImpl.cpp mlir/lib/Dialect/Arithmetic/Transforms/Bufferize.cpp -mlir/lib/Dialect/Arithmetic/Transforms/ExpandOps.cpp mlir/lib/Dialect/Arithmetic/Transforms/PassDetail.h +mlir/lib/Dialect/Arithmetic/Utils/Utils.cpp mlir/lib/Dialect/ArmNeon/IR/ArmNeonDialect.cpp mlir/lib/Dialect/ArmSVE/IR/ArmSVEDialect.cpp mlir/lib/Dialect/ArmSVE/Transforms/LegalizeForLLVMExport.cpp @@ -7673,16 +8119,27 @@ mlir/lib/Dialect/Async/Transforms/PassDetail.cpp mlir/lib/Dialect/Async/Transforms/PassDetail.h mlir/lib/Dialect/Bufferization/IR/AllocationOpInterface.cpp +mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp mlir/lib/Dialect/Bufferization/IR/BufferizationDialect.cpp mlir/lib/Dialect/Bufferization/IR/BufferizationOps.cpp mlir/lib/Dialect/Bufferization/Transforms/BufferDeallocation.cpp mlir/lib/Dialect/Bufferization/Transforms/Bufferize.cpp +mlir/lib/Dialect/Bufferization/Transforms/BufferOptimizations.cpp +mlir/lib/Dialect/Bufferization/Transforms/BufferResultsToOutParams.cpp +mlir/lib/Dialect/Bufferization/Transforms/BufferUtils.cpp +mlir/lib/Dialect/Bufferization/Transforms/OneShotAnalysis.cpp mlir/lib/Dialect/Bufferization/Transforms/PassDetail.h mlir/lib/Dialect/Complex/IR/ComplexDialect.cpp mlir/lib/Dialect/Complex/IR/ComplexOps.cpp +mlir/lib/Dialect/ControlFlow/IR/ControlFlowOps.cpp mlir/lib/Dialect/DLTI/DLTI.cpp mlir/lib/Dialect/DLTI/Traits.cpp mlir/lib/Dialect/EmitC/IR/EmitC.cpp +mlir/lib/Dialect/Func/IR/FuncOps.cpp +mlir/lib/Dialect/Func/Transforms/DecomposeCallGraphTypes.cpp +mlir/lib/Dialect/Func/Transforms/FuncBufferize.cpp +mlir/lib/Dialect/Func/Transforms/FuncConversions.cpp +mlir/lib/Dialect/Func/Transforms/PassDetail.h mlir/lib/Dialect/GPU/IR/GPUDialect.cpp mlir/lib/Dialect/GPU/Transforms/AllReduceLowering.cpp mlir/lib/Dialect/GPU/Transforms/AsyncRegionRewriter.cpp @@ -7693,22 +8150,15 @@ mlir/lib/Dialect/GPU/Transforms/SerializeToBlob.cpp mlir/lib/Dialect/Linalg/Analysis/DependenceAnalysis.cpp mlir/lib/Dialect/Linalg/ComprehensiveBufferize/AffineInterfaceImpl.cpp -mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ArithInterfaceImpl.cpp -mlir/lib/Dialect/Linalg/ComprehensiveBufferize/BufferizableOpInterface.cpp -mlir/lib/Dialect/Linalg/ComprehensiveBufferize/BufferizationInterfaceImpl.cpp -mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ComprehensiveBufferize.cpp -mlir/lib/Dialect/Linalg/ComprehensiveBufferize/LinalgInterfaceImpl.cpp -mlir/lib/Dialect/Linalg/ComprehensiveBufferize/SCFInterfaceImpl.cpp -mlir/lib/Dialect/Linalg/ComprehensiveBufferize/TensorInterfaceImpl.cpp -mlir/lib/Dialect/Linalg/ComprehensiveBufferize/VectorInterfaceImpl.cpp +mlir/lib/Dialect/Linalg/ComprehensiveBufferize/ModuleBufferization.cpp mlir/lib/Dialect/Linalg/IR/LinalgDialect.cpp mlir/lib/Dialect/Linalg/IR/LinalgInterfaces.cpp mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp +mlir/lib/Dialect/Linalg/Transforms/BufferizableOpInterfaceImpl.cpp mlir/lib/Dialect/Linalg/Transforms/Bufferize.cpp mlir/lib/Dialect/Linalg/Transforms/CodegenStrategy.cpp mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp mlir/lib/Dialect/Linalg/Transforms/Detensorize.cpp -mlir/lib/Dialect/Linalg/Transforms/Distribution.cpp mlir/lib/Dialect/Linalg/Transforms/DropUnitDims.cpp mlir/lib/Dialect/Linalg/Transforms/ElementwiseOpFusion.cpp mlir/lib/Dialect/Linalg/Transforms/ElementwiseToLinalg.cpp @@ -7722,16 +8172,16 @@ mlir/lib/Dialect/Linalg/Transforms/LinalgStrategyPasses.cpp mlir/lib/Dialect/Linalg/Transforms/Loops.cpp mlir/lib/Dialect/Linalg/Transforms/NamedOpConversions.cpp +mlir/lib/Dialect/Linalg/Transforms/PadOpInterchange.cpp mlir/lib/Dialect/Linalg/Transforms/PassDetail.h mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp +mlir/lib/Dialect/Linalg/Transforms/SparseTensorRewriting.cpp mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp mlir/lib/Dialect/Linalg/Transforms/Transforms.cpp mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp mlir/lib/Dialect/Linalg/Utils/Utils.cpp mlir/lib/Dialect/LLVMIR/IR/FunctionCallUtils.cpp -mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp mlir/lib/Dialect/LLVMIR/IR/LLVMTypes.cpp -mlir/lib/Dialect/LLVMIR/IR/LLVMTypeSyntax.cpp mlir/lib/Dialect/LLVMIR/IR/NVVMDialect.cpp mlir/lib/Dialect/LLVMIR/IR/ROCDLDialect.cpp mlir/lib/Dialect/LLVMIR/IR/TypeDetail.h @@ -7744,7 +8194,11 @@ mlir/lib/Dialect/Math/Transforms/PolynomialApproximation.cpp mlir/lib/Dialect/MemRef/IR/MemRefDialect.cpp mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp +mlir/lib/Dialect/MemRef/Transforms/ExpandOps.cpp mlir/lib/Dialect/MemRef/Transforms/FoldSubViewOps.cpp +mlir/lib/Dialect/MemRef/Transforms/MultiBuffer.cpp +mlir/lib/Dialect/MemRef/Transforms/NormalizeMemRefs.cpp +mlir/lib/Dialect/MemRef/Transforms/PassDetail.h mlir/lib/Dialect/MemRef/Transforms/ResolveShapedTypeResultDims.cpp mlir/lib/Dialect/MemRef/Utils/MemRefUtils.cpp mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp @@ -7763,17 +8217,19 @@ mlir/lib/Dialect/Quant/Utils/QuantizeUtils.cpp mlir/lib/Dialect/Quant/Utils/UniformSupport.cpp mlir/lib/Dialect/SCF/SCF.cpp -mlir/lib/Dialect/SCF/Transforms/AffineCanonicalizationUtils.cpp +mlir/lib/Dialect/SCF/Transforms/BufferizableOpInterfaceImpl.cpp mlir/lib/Dialect/SCF/Transforms/Bufferize.cpp mlir/lib/Dialect/SCF/Transforms/ForToWhile.cpp mlir/lib/Dialect/SCF/Transforms/LoopPipelining.cpp mlir/lib/Dialect/SCF/Transforms/LoopRangeFolding.cpp mlir/lib/Dialect/SCF/Transforms/LoopSpecialization.cpp +mlir/lib/Dialect/SCF/Transforms/ParallelLoopCollapsing.cpp mlir/lib/Dialect/SCF/Transforms/ParallelLoopFusion.cpp mlir/lib/Dialect/SCF/Transforms/ParallelLoopTiling.cpp mlir/lib/Dialect/SCF/Transforms/PassDetail.h mlir/lib/Dialect/SCF/Transforms/StructuralTypeConversions.cpp -mlir/lib/Dialect/SCF/Transforms/Utils.cpp +mlir/lib/Dialect/SCF/Utils/AffineCanonicalizationUtils.cpp +mlir/lib/Dialect/SCF/Utils/Utils.cpp mlir/lib/Dialect/Shape/IR/Shape.cpp mlir/lib/Dialect/Shape/Transforms/Bufferize.cpp mlir/lib/Dialect/Shape/Transforms/PassDetail.h @@ -7781,9 +8237,11 @@ mlir/lib/Dialect/Shape/Transforms/ShapeToShapeLowering.cpp mlir/lib/Dialect/Shape/Transforms/StructuralTypeConversions.cpp mlir/lib/Dialect/SparseTensor/IR/SparseTensorDialect.cpp +mlir/lib/Dialect/SparseTensor/Pipelines/SparseTensorPipelines.cpp +mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.cpp +mlir/lib/Dialect/SparseTensor/Transforms/CodegenUtils.h mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorConversion.cpp mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorPasses.cpp -mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp mlir/lib/Dialect/SparseTensor/Utils/Merger.cpp mlir/lib/Dialect/SPIRV/IR/SPIRVAttributes.cpp mlir/lib/Dialect/SPIRV/IR/SPIRVCanonicalization.cpp @@ -7796,46 +8254,50 @@ mlir/lib/Dialect/SPIRV/Transforms/LowerABIAttributesPass.cpp mlir/lib/Dialect/SPIRV/Transforms/PassDetail.h mlir/lib/Dialect/SPIRV/Transforms/RewriteInsertsPass.cpp +mlir/lib/Dialect/SPIRV/Transforms/UnifyAliasedResourcePass.cpp mlir/lib/Dialect/SPIRV/Transforms/UpdateVCEPass.cpp mlir/lib/Dialect/SPIRV/Utils/LayoutUtils.cpp -mlir/lib/Dialect/StandardOps/IR/Ops.cpp -mlir/lib/Dialect/StandardOps/Transforms/Bufferize.cpp -mlir/lib/Dialect/StandardOps/Transforms/ComposeSubView.cpp -mlir/lib/Dialect/StandardOps/Transforms/DecomposeCallGraphTypes.cpp -mlir/lib/Dialect/StandardOps/Transforms/ExpandOps.cpp -mlir/lib/Dialect/StandardOps/Transforms/FuncBufferize.cpp -mlir/lib/Dialect/StandardOps/Transforms/FuncConversions.cpp -mlir/lib/Dialect/StandardOps/Transforms/PassDetail.h -mlir/lib/Dialect/StandardOps/Transforms/TensorConstantBufferize.cpp -mlir/lib/Dialect/StandardOps/Utils/Utils.cpp mlir/lib/Dialect/Tensor/IR/TensorDialect.cpp mlir/lib/Dialect/Tensor/IR/TensorInferTypeOpInterfaceImpl.cpp mlir/lib/Dialect/Tensor/IR/TensorOps.cpp +mlir/lib/Dialect/Tensor/IR/TensorTilingInterfaceImpl.cpp +mlir/lib/Dialect/Tensor/Transforms/BufferizableOpInterfaceImpl.cpp mlir/lib/Dialect/Tensor/Transforms/Bufferize.cpp mlir/lib/Dialect/Tensor/Transforms/PassDetail.h +mlir/lib/Dialect/Tensor/Transforms/SplitPadding.cpp +mlir/lib/Dialect/Tensor/Utils/Utils.cpp mlir/lib/Dialect/Tosa/IR/TosaOps.cpp +mlir/lib/Dialect/Tosa/Transforms/TosaDecomposeConv2D.cpp +mlir/lib/Dialect/Tosa/Transforms/TosaDecomposeDepthwise.cpp mlir/lib/Dialect/Tosa/Transforms/TosaDecomposeTransposeConv.cpp mlir/lib/Dialect/Tosa/Transforms/TosaInferShapes.cpp mlir/lib/Dialect/Tosa/Transforms/TosaMakeBroadcastable.cpp -mlir/lib/Dialect/Tosa/Transforms/TosaOptimization.cpp +mlir/lib/Dialect/Tosa/Transforms/TosaOptionalDecompositions.cpp +mlir/lib/Dialect/Tosa/Utils/ConversionUtils.cpp mlir/lib/Dialect/Tosa/Utils/QuantUtils.cpp +mlir/lib/Dialect/Utils/IndexingUtils.cpp mlir/lib/Dialect/Utils/ReshapeOpsUtils.cpp mlir/lib/Dialect/Utils/StaticValueUtils.cpp mlir/lib/Dialect/Utils/StructuredOpsUtils.cpp -mlir/lib/Dialect/Vector/VectorDropLeadUnitDim.cpp -mlir/lib/Dialect/Vector/VectorInsertExtractStridedSliceRewritePatterns.cpp -mlir/lib/Dialect/Vector/VectorMultiDimReductionTransforms.cpp -mlir/lib/Dialect/Vector/VectorOps.cpp -mlir/lib/Dialect/Vector/VectorTransferOpTransforms.cpp -mlir/lib/Dialect/Vector/VectorTransferPermutationMapRewritePatterns.cpp -mlir/lib/Dialect/Vector/VectorUnrollDistribute.cpp -mlir/lib/Dialect/Vector/VectorUtils.cpp +mlir/lib/Dialect/Vector/Transforms/BufferizableOpInterfaceImpl.cpp +mlir/lib/Dialect/Vector/Transforms/Bufferize.cpp +mlir/lib/Dialect/Vector/Transforms/PassDetail.h +mlir/lib/Dialect/Vector/Transforms/VectorDropLeadUnitDim.cpp +mlir/lib/Dialect/Vector/Transforms/VectorInsertExtractStridedSliceRewritePatterns.cpp +mlir/lib/Dialect/Vector/Transforms/VectorMultiDimReductionTransforms.cpp +mlir/lib/Dialect/Vector/Transforms/VectorTransferOpTransforms.cpp +mlir/lib/Dialect/Vector/Transforms/VectorTransferPermutationMapRewritePatterns.cpp +mlir/lib/Dialect/Vector/Transforms/VectorTransferSplitRewritePatterns.cpp +mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp +mlir/lib/Dialect/Vector/Transforms/VectorUnrollDistribute.cpp +mlir/lib/Dialect/Vector/Utils/VectorUtils.cpp mlir/lib/Dialect/X86Vector/IR/X86VectorDialect.cpp mlir/lib/Dialect/X86Vector/Transforms/AVXTranspose.cpp mlir/lib/Dialect/X86Vector/Transforms/LegalizeForLLVMExport.cpp mlir/lib/ExecutionEngine/AsyncRuntime.cpp mlir/lib/ExecutionEngine/CRunnerUtils.cpp mlir/lib/ExecutionEngine/CudaRuntimeWrappers.cpp +mlir/lib/ExecutionEngine/ExecutionEngine.cpp mlir/lib/ExecutionEngine/JitRunner.cpp mlir/lib/ExecutionEngine/OptUtils.cpp mlir/lib/ExecutionEngine/RocmRuntimeWrappers.cpp @@ -7856,25 +8318,22 @@ mlir/lib/IR/AffineExprDetail.h mlir/lib/IR/AffineMap.cpp mlir/lib/IR/AffineMapDetail.h -mlir/lib/IR/AsmPrinter.cpp mlir/lib/IR/AttributeDetail.h mlir/lib/IR/Attributes.cpp mlir/lib/IR/Builders.cpp mlir/lib/IR/BuiltinAttributeInterfaces.cpp mlir/lib/IR/BuiltinAttributes.cpp mlir/lib/IR/BuiltinDialect.cpp +mlir/lib/IR/BuiltinTypeInterfaces.cpp mlir/lib/IR/BuiltinTypes.cpp -mlir/lib/IR/Diagnostics.cpp mlir/lib/IR/Dialect.cpp mlir/lib/IR/Dominance.cpp mlir/lib/IR/FunctionImplementation.cpp -mlir/lib/IR/FunctionSupport.cpp mlir/lib/IR/IntegerSet.cpp mlir/lib/IR/IntegerSetDetail.h mlir/lib/IR/Location.cpp mlir/lib/IR/MLIRContext.cpp mlir/lib/IR/Operation.cpp -mlir/lib/IR/OperationSupport.cpp mlir/lib/IR/PatternMatch.cpp mlir/lib/IR/Region.cpp mlir/lib/IR/RegionKindInterface.cpp @@ -7888,10 +8347,6 @@ mlir/lib/IR/Verifier.cpp mlir/lib/IR/Visitors.cpp mlir/lib/Parser/AffineParser.cpp -mlir/lib/Parser/AsmParserImpl.h -mlir/lib/Parser/AsmParserState.cpp -mlir/lib/Parser/AttributeParser.cpp -mlir/lib/Parser/DialectSymbolParser.cpp mlir/lib/Parser/Lexer.cpp mlir/lib/Parser/Lexer.h mlir/lib/Parser/LocationParser.cpp @@ -7946,7 +8401,6 @@ mlir/lib/Target/LLVMIR/ConvertToLLVMIR.cpp mlir/lib/Target/LLVMIR/DebugTranslation.cpp mlir/lib/Target/LLVMIR/DebugTranslation.h -mlir/lib/Target/LLVMIR/ModuleTranslation.cpp mlir/lib/Target/LLVMIR/TypeFromLLVM.cpp mlir/lib/Target/LLVMIR/TypeToLLVM.cpp mlir/lib/Target/LLVMIR/Dialect/AMX/AMXToLLVMIRTranslation.cpp @@ -7968,7 +8422,6 @@ mlir/lib/Tools/mlir-lsp-server/LSPServer.cpp mlir/lib/Tools/mlir-lsp-server/LSPServer.h mlir/lib/Tools/mlir-lsp-server/MlirLspServerMain.cpp -mlir/lib/Tools/mlir-lsp-server/MLIRServer.cpp mlir/lib/Tools/mlir-lsp-server/MLIRServer.h mlir/lib/Tools/mlir-lsp-server/lsp/Logging.cpp mlir/lib/Tools/mlir-lsp-server/lsp/Protocol.cpp @@ -7980,33 +8433,31 @@ mlir/lib/Tools/PDLL/AST/NodePrinter.cpp mlir/lib/Tools/PDLL/AST/TypeDetail.h mlir/lib/Tools/PDLL/AST/Types.cpp +mlir/lib/Tools/PDLL/CodeGen/CPPGen.cpp +mlir/lib/Tools/PDLL/ODS/Context.cpp +mlir/lib/Tools/PDLL/ODS/Dialect.cpp +mlir/lib/Tools/PDLL/ODS/Operation.cpp mlir/lib/Tools/PDLL/Parser/Parser.cpp -mlir/lib/Transforms/BufferOptimizations.cpp -mlir/lib/Transforms/BufferResultsToOutParams.cpp -mlir/lib/Transforms/BufferUtils.cpp mlir/lib/Transforms/Canonicalizer.cpp +mlir/lib/Transforms/ControlFlowSink.cpp mlir/lib/Transforms/CSE.cpp mlir/lib/Transforms/Inliner.cpp mlir/lib/Transforms/LocationSnapshot.cpp -mlir/lib/Transforms/LoopCoalescing.cpp -mlir/lib/Transforms/LoopFusion.cpp -mlir/lib/Transforms/NormalizeMemRefs.cpp -mlir/lib/Transforms/ParallelLoopCollapsing.cpp +mlir/lib/Transforms/LoopInvariantCodeMotion.cpp mlir/lib/Transforms/PassDetail.h -mlir/lib/Transforms/PipelineDataTransfer.cpp mlir/lib/Transforms/SCCP.cpp mlir/lib/Transforms/StripDebugInfo.cpp mlir/lib/Transforms/SymbolDCE.cpp +mlir/lib/Transforms/SymbolPrivatize.cpp +mlir/lib/Transforms/Utils/ControlFlowSinkUtils.cpp mlir/lib/Transforms/Utils/DialectConversion.cpp mlir/lib/Transforms/Utils/FoldUtils.cpp mlir/lib/Transforms/Utils/GreedyPatternRewriteDriver.cpp mlir/lib/Transforms/Utils/InliningUtils.cpp -mlir/lib/Transforms/Utils/LoopFusionUtils.cpp -mlir/lib/Transforms/Utils/LoopUtils.cpp mlir/lib/Transforms/Utils/RegionUtils.cpp -mlir/lib/Transforms/Utils/Utils.cpp mlir/lib/Translation/Translation.cpp mlir/tools/mlir-cpu-runner/mlir-cpu-runner.cpp +mlir/tools/mlir-linalg-ods-gen/mlir-linalg-ods-yaml-gen.cpp mlir/tools/mlir-lsp-server/mlir-lsp-server.cpp mlir/tools/mlir-opt/mlir-opt.cpp mlir/tools/mlir-pdll/mlir-pdll.cpp @@ -8046,19 +8497,20 @@ mlir/tools/mlir-vulkan-runner/vulkan-runtime-wrappers.cpp mlir/tools/mlir-vulkan-runner/VulkanRuntime.cpp mlir/tools/mlir-vulkan-runner/VulkanRuntime.h -mlir/unittests/Analysis/AffineStructuresParser.cpp -mlir/unittests/Analysis/AffineStructuresParser.h -mlir/unittests/Analysis/AffineStructuresParserTest.cpp -mlir/unittests/Analysis/AffineStructuresTest.cpp -mlir/unittests/Analysis/PresburgerSetTest.cpp mlir/unittests/Analysis/Presburger/IntegerPolyhedronTest.cpp mlir/unittests/Analysis/Presburger/LinearTransformTest.cpp mlir/unittests/Analysis/Presburger/MatrixTest.cpp +mlir/unittests/Analysis/Presburger/PresburgerSetTest.cpp +mlir/unittests/Analysis/Presburger/PresburgerSpaceTest.cpp +mlir/unittests/Analysis/Presburger/PWMAFunctionTest.cpp mlir/unittests/Analysis/Presburger/SimplexTest.cpp +mlir/unittests/Analysis/Presburger/Utils.h mlir/unittests/Conversion/PDLToPDLInterp/RootOrderingTest.cpp mlir/unittests/Dialect/BroadcastShapeTest.cpp +mlir/unittests/Dialect/Affine/Analysis/AffineStructuresParser.cpp +mlir/unittests/Dialect/Affine/Analysis/AffineStructuresParser.h +mlir/unittests/Dialect/Affine/Analysis/AffineStructuresParserTest.cpp mlir/unittests/Dialect/Quant/QuantizationUtilsTest.cpp -mlir/unittests/Dialect/SCF/SCFOps.cpp mlir/unittests/Dialect/SparseTensor/MergerTest.cpp mlir/unittests/Dialect/SPIRV/DeserializationTest.cpp mlir/unittests/Dialect/SPIRV/SerializationTest.cpp @@ -8076,6 +8528,7 @@ mlir/unittests/IR/SubElementInterfaceTest.cpp mlir/unittests/Pass/AnalysisManagerTest.cpp mlir/unittests/Pass/PassManagerTest.cpp +mlir/unittests/Pass/PassPipelineParserTest.cpp mlir/unittests/Rewrite/PatternBenefit.cpp mlir/unittests/Support/DebugCounterTest.cpp mlir/unittests/Support/IndentedOstreamTest.cpp @@ -8105,7 +8558,6 @@ openmp/libomptarget/DeviceRTL/include/Utils.h openmp/libomptarget/DeviceRTL/src/Configuration.cpp openmp/libomptarget/DeviceRTL/src/Kernel.cpp -openmp/libomptarget/DeviceRTL/src/Mapping.cpp openmp/libomptarget/DeviceRTL/src/Misc.cpp openmp/libomptarget/DeviceRTL/src/Parallelism.cpp openmp/libomptarget/DeviceRTL/src/Reduction.cpp @@ -8113,25 +8565,10 @@ openmp/libomptarget/DeviceRTL/src/Synchronization.cpp openmp/libomptarget/DeviceRTL/src/Tasking.cpp openmp/libomptarget/DeviceRTL/src/Utils.cpp -openmp/libomptarget/deviceRTLs/interface.h -openmp/libomptarget/deviceRTLs/target_interface.h -openmp/libomptarget/deviceRTLs/amdgcn/src/amdgcn_interface.h -openmp/libomptarget/deviceRTLs/amdgcn/src/target_impl.h -openmp/libomptarget/deviceRTLs/common/allocator.h -openmp/libomptarget/deviceRTLs/common/debug.h -openmp/libomptarget/deviceRTLs/common/omptarget.h -openmp/libomptarget/deviceRTLs/common/omptargeti.h -openmp/libomptarget/deviceRTLs/common/state-queue.h -openmp/libomptarget/deviceRTLs/common/state-queuei.h -openmp/libomptarget/deviceRTLs/common/include/target.h -openmp/libomptarget/deviceRTLs/common/include/target/shuffle.h -openmp/libomptarget/deviceRTLs/common/src/shuffle.cpp -openmp/libomptarget/deviceRTLs/nvptx/src/nvptx_interface.h -openmp/libomptarget/deviceRTLs/nvptx/src/target_impl.h openmp/libomptarget/include/Debug.h openmp/libomptarget/include/device.h openmp/libomptarget/include/DeviceEnvironment.h -openmp/libomptarget/include/dlwrap.h +openmp/libomptarget/include/interop.h openmp/libomptarget/include/omptarget.h openmp/libomptarget/include/omptargetplugin.h openmp/libomptarget/include/rtl.h @@ -8154,10 +8591,8 @@ openmp/libomptarget/plugins/common/MemoryManager/MemoryManager.h openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.cpp openmp/libomptarget/plugins/cuda/dynamic_cuda/cuda.h -openmp/libomptarget/plugins/cuda/src/rtl.cpp openmp/libomptarget/plugins/generic-elf-64bit/src/rtl.cpp openmp/libomptarget/plugins/remote/include/Utils.h -openmp/libomptarget/plugins/remote/lib/Utils.cpp openmp/libomptarget/plugins/remote/server/OffloadingServer.cpp openmp/libomptarget/plugins/remote/server/Server.cpp openmp/libomptarget/plugins/remote/server/Server.h @@ -8166,6 +8601,8 @@ openmp/libomptarget/plugins/ve/src/rtl.cpp openmp/libomptarget/src/api.cpp openmp/libomptarget/src/interface.cpp +openmp/libomptarget/src/interop.cpp +openmp/libomptarget/src/omptarget.cpp openmp/libomptarget/src/private.h openmp/libomptarget/src/rtl.cpp openmp/libomptarget/tools/deviceinfo/llvm-omp-device-info.cpp @@ -8202,7 +8639,6 @@ openmp/runtime/src/kmp_itt.cpp openmp/runtime/src/kmp_itt.h openmp/runtime/src/kmp_lock.cpp -openmp/runtime/src/kmp_lock.h openmp/runtime/src/kmp_omp.h openmp/runtime/src/kmp_platform.h openmp/runtime/src/kmp_safe_c_api.h @@ -8373,9 +8809,12 @@ polly/unittests/Support/ISLTools.cpp pstl/include/pstl/internal/algorithm_fwd.h pstl/include/pstl/internal/execution_defs.h +pstl/include/pstl/internal/execution_impl.h pstl/include/pstl/internal/glue_algorithm_defs.h +pstl/include/pstl/internal/glue_algorithm_impl.h pstl/include/pstl/internal/glue_execution_defs.h pstl/include/pstl/internal/glue_memory_defs.h +pstl/include/pstl/internal/glue_memory_impl.h pstl/include/pstl/internal/glue_numeric_defs.h pstl/include/pstl/internal/glue_numeric_impl.h pstl/include/pstl/internal/numeric_fwd.h @@ -8383,6 +8822,7 @@ pstl/include/pstl/internal/parallel_backend_omp.h pstl/include/pstl/internal/parallel_backend_serial.h pstl/include/pstl/internal/parallel_backend_utils.h +pstl/include/pstl/internal/parallel_impl.h pstl/include/pstl/internal/omp/parallel_for.h pstl/include/pstl/internal/omp/parallel_for_each.h pstl/include/pstl/internal/omp/parallel_invoke.h diff --git a/clang/include/clang/Basic/Module.h b/clang/include/clang/Basic/Module.h --- a/clang/include/clang/Basic/Module.h +++ b/clang/include/clang/Basic/Module.h @@ -371,6 +371,10 @@ /// The set of use declarations that have yet to be resolved. SmallVector UnresolvedDirectUses; + /// When \c NoUndeclaredIncludes is true, the set of modules this module tried + /// to import but didn't because they are not direct uses. + llvm::SmallSetVector UndeclaredUses; + /// A library or framework to link against when an entity from this /// module is used. struct LinkLibrary { @@ -601,7 +605,7 @@ /// Determine whether this module has declared its intention to /// directly use another module. - bool directlyUses(const Module *Requested) const; + bool directlyUses(const Module *Requested); /// Add the given feature requirement to the list of features /// required by this module. diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h --- a/clang/include/clang/Parse/Parser.h +++ b/clang/include/clang/Parse/Parser.h @@ -2686,34 +2686,30 @@ /// Such situations should use the specific attribute parsing functionality. void ParseAttributes(unsigned WhichAttrKinds, ParsedAttributesWithRange &Attrs, - SourceLocation *End = nullptr, LateParsedAttrList *LateAttrs = nullptr); void ParseAttributes(unsigned WhichAttrKinds, ParsedAttributes &Attrs, - SourceLocation *End = nullptr, LateParsedAttrList *LateAttrs = nullptr) { ParsedAttributesWithRange AttrsWithRange(AttrFactory); - ParseAttributes(WhichAttrKinds, AttrsWithRange, End, LateAttrs); + ParseAttributes(WhichAttrKinds, AttrsWithRange, LateAttrs); Attrs.takeAllFrom(AttrsWithRange); } /// \brief Possibly parse attributes based on what syntaxes are desired, /// allowing for the order to vary. bool MaybeParseAttributes(unsigned WhichAttrKinds, ParsedAttributesWithRange &Attrs, - SourceLocation *End = nullptr, LateParsedAttrList *LateAttrs = nullptr) { if (Tok.isOneOf(tok::kw___attribute, tok::kw___declspec) || (standardAttributesAllowed() && isCXX11AttributeSpecifier())) { - ParseAttributes(WhichAttrKinds, Attrs, End, LateAttrs); + ParseAttributes(WhichAttrKinds, Attrs, LateAttrs); return true; } return false; } bool MaybeParseAttributes(unsigned WhichAttrKinds, ParsedAttributes &Attrs, - SourceLocation *End = nullptr, LateParsedAttrList *LateAttrs = nullptr) { if (Tok.isOneOf(tok::kw___attribute, tok::kw___declspec) || (standardAttributesAllowed() && isCXX11AttributeSpecifier())) { - ParseAttributes(WhichAttrKinds, Attrs, End, LateAttrs); + ParseAttributes(WhichAttrKinds, Attrs, LateAttrs); return true; } return false; @@ -2722,10 +2718,9 @@ void MaybeParseGNUAttributes(Declarator &D, LateParsedAttrList *LateAttrs = nullptr) { if (Tok.is(tok::kw___attribute)) { - ParsedAttributes attrs(AttrFactory); - SourceLocation endLoc; - ParseGNUAttributes(attrs, &endLoc, LateAttrs, &D); - D.takeAttributes(attrs, endLoc); + ParsedAttributesWithRange attrs(AttrFactory); + ParseGNUAttributes(attrs, LateAttrs, &D); + D.takeAttributes(attrs, attrs.Range.getEnd()); } } @@ -2735,11 +2730,10 @@ /// This API is discouraged. Use the version that takes a /// ParsedAttributesWithRange instead. bool MaybeParseGNUAttributes(ParsedAttributes &Attrs, - SourceLocation *EndLoc = nullptr, LateParsedAttrList *LateAttrs = nullptr) { if (Tok.is(tok::kw___attribute)) { ParsedAttributesWithRange AttrsWithRange(AttrFactory); - ParseGNUAttributes(Attrs, EndLoc, LateAttrs); + ParseGNUAttributes(Attrs, LateAttrs); Attrs.takeAllFrom(AttrsWithRange); return true; } @@ -2747,10 +2741,9 @@ } bool MaybeParseGNUAttributes(ParsedAttributesWithRange &Attrs, - SourceLocation *EndLoc = nullptr, LateParsedAttrList *LateAttrs = nullptr) { if (Tok.is(tok::kw___attribute)) { - ParseGNUAttributes(Attrs, EndLoc, LateAttrs); + ParseGNUAttributes(Attrs, LateAttrs); return true; } return false; @@ -2762,16 +2755,14 @@ /// This API is discouraged. Use the version that takes a /// ParsedAttributesWithRange instead. void ParseGNUAttributes(ParsedAttributes &Attrs, - SourceLocation *EndLoc = nullptr, LateParsedAttrList *LateAttrs = nullptr, Declarator *D = nullptr) { ParsedAttributesWithRange AttrsWithRange(AttrFactory); - ParseGNUAttributes(AttrsWithRange, EndLoc, LateAttrs, D); + ParseGNUAttributes(AttrsWithRange, LateAttrs, D); Attrs.takeAllFrom(AttrsWithRange); } void ParseGNUAttributes(ParsedAttributesWithRange &Attrs, - SourceLocation *EndLoc = nullptr, LateParsedAttrList *LateAttrs = nullptr, Declarator *D = nullptr); void ParseGNUAttributeArgs(IdentifierInfo *AttrName, @@ -2800,27 +2791,24 @@ void MaybeParseCXX11Attributes(Declarator &D) { if (standardAttributesAllowed() && isCXX11AttributeSpecifier()) { ParsedAttributesWithRange attrs(AttrFactory); - SourceLocation endLoc; - ParseCXX11Attributes(attrs, &endLoc); - D.takeAttributes(attrs, endLoc); + ParseCXX11Attributes(attrs); + D.takeAttributes(attrs, attrs.Range.getEnd()); } } - bool MaybeParseCXX11Attributes(ParsedAttributes &attrs, - SourceLocation *endLoc = nullptr) { + bool MaybeParseCXX11Attributes(ParsedAttributes &attrs) { if (standardAttributesAllowed() && isCXX11AttributeSpecifier()) { ParsedAttributesWithRange attrsWithRange(AttrFactory); - ParseCXX11Attributes(attrsWithRange, endLoc); + ParseCXX11Attributes(attrsWithRange); attrs.takeAllFrom(attrsWithRange); return true; } return false; } bool MaybeParseCXX11Attributes(ParsedAttributesWithRange &attrs, - SourceLocation *endLoc = nullptr, bool OuterMightBeMessageSend = false) { if (standardAttributesAllowed() && isCXX11AttributeSpecifier(false, OuterMightBeMessageSend)) { - ParseCXX11Attributes(attrs, endLoc); + ParseCXX11Attributes(attrs); return true; } return false; @@ -2838,8 +2826,7 @@ ParseCXX11AttributeSpecifierInternal(Attrs, OpenMPTokens, EndLoc); ReplayOpenMPAttributeTokens(OpenMPTokens); } - void ParseCXX11Attributes(ParsedAttributesWithRange &attrs, - SourceLocation *EndLoc = nullptr); + void ParseCXX11Attributes(ParsedAttributesWithRange &attrs); /// Parses a C++11 (or C2x)-style attribute argument list. Returns true /// if this results in adding an attribute to the ParsedAttributes list. bool ParseCXX11AttributeArgs(IdentifierInfo *AttrName, @@ -2854,25 +2841,23 @@ Sema::AttributeCompletion Completion = Sema::AttributeCompletion::None, const IdentifierInfo *EnclosingScope = nullptr); - void MaybeParseMicrosoftAttributes(ParsedAttributes &attrs, - SourceLocation *endLoc = nullptr) { - if (getLangOpts().MicrosoftExt && Tok.is(tok::l_square)) - ParseMicrosoftAttributes(attrs, endLoc); + void MaybeParseMicrosoftAttributes(ParsedAttributes &Attrs) { + if (getLangOpts().MicrosoftExt && Tok.is(tok::l_square)) { + ParsedAttributesWithRange AttrsWithRange(AttrFactory); + ParseMicrosoftAttributes(AttrsWithRange); + Attrs.takeAllFrom(AttrsWithRange); + } } void ParseMicrosoftUuidAttributeArgs(ParsedAttributes &Attrs); - void ParseMicrosoftAttributes(ParsedAttributes &attrs, - SourceLocation *endLoc = nullptr); - bool MaybeParseMicrosoftDeclSpecs(ParsedAttributes &Attrs, - SourceLocation *End = nullptr) { - const auto &LO = getLangOpts(); - if (LO.DeclSpecKeyword && Tok.is(tok::kw___declspec)) { - ParseMicrosoftDeclSpecs(Attrs, End); + void ParseMicrosoftAttributes(ParsedAttributesWithRange &attrs); + bool MaybeParseMicrosoftDeclSpecs(ParsedAttributesWithRange &Attrs) { + if (getLangOpts().DeclSpecKeyword && Tok.is(tok::kw___declspec)) { + ParseMicrosoftDeclSpecs(Attrs); return true; } return false; } - void ParseMicrosoftDeclSpecs(ParsedAttributes &Attrs, - SourceLocation *End = nullptr); + void ParseMicrosoftDeclSpecs(ParsedAttributesWithRange &Attrs); bool ParseMicrosoftDeclSpecArgs(IdentifierInfo *AttrName, SourceLocation AttrNameLoc, ParsedAttributes &Attrs); diff --git a/clang/include/clang/Sema/DeclSpec.h b/clang/include/clang/Sema/DeclSpec.h --- a/clang/include/clang/Sema/DeclSpec.h +++ b/clang/include/clang/Sema/DeclSpec.h @@ -368,7 +368,7 @@ ExplicitSpecifier FS_explicit_specifier; // attributes. - ParsedAttributes Attrs; + ParsedAttributesWithRange Attrs; // Scope specifier for the type spec, if applicable. CXXScopeSpec TypeScope; diff --git a/clang/include/clang/Tooling/Syntax/Pseudo/Preprocess.h b/clang/include/clang/Tooling/Syntax/Pseudo/DirectiveMap.h rename from clang/include/clang/Tooling/Syntax/Pseudo/Preprocess.h rename to clang/include/clang/Tooling/Syntax/Pseudo/DirectiveMap.h --- a/clang/include/clang/Tooling/Syntax/Pseudo/Preprocess.h +++ b/clang/include/clang/Tooling/Syntax/Pseudo/DirectiveMap.h @@ -1,4 +1,4 @@ -//===--- Preprocess.h - Preprocess token streams -----------------*- C++-*-===// +//===--- DirectiveMap.h - Find and strip preprocessor directives -*- C++-*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -56,7 +56,7 @@ /// /// Unlike the clang preprocessor, we model the full tree explicitly. /// This class does not recognize macro usage, only directives. -struct PPStructure { +struct DirectiveMap { /// A range of code (and possibly comments) containing no directives. struct Code { Token::Range Tokens; @@ -76,7 +76,7 @@ /// /// The first branch will have an #if type directive. /// Subsequent branches will have #else type directives. - std::vector> Branches; + std::vector> Branches; /// The directive terminating the conditional, should be #endif. Directive End; }; @@ -86,22 +86,22 @@ std::vector Chunks; /// Extract preprocessor structure by examining the raw tokens. - static PPStructure parse(const TokenStream &); + static DirectiveMap parse(const TokenStream &); // FIXME: add heuristically selection of conditional branches. // FIXME: allow deriving a preprocessed stream }; -llvm::raw_ostream &operator<<(llvm::raw_ostream &, const PPStructure &); -llvm::raw_ostream &operator<<(llvm::raw_ostream &, const PPStructure::Chunk &); -llvm::raw_ostream &operator<<(llvm::raw_ostream &, const PPStructure::Code &); +llvm::raw_ostream &operator<<(llvm::raw_ostream &, const DirectiveMap &); +llvm::raw_ostream &operator<<(llvm::raw_ostream &, const DirectiveMap::Chunk &); +llvm::raw_ostream &operator<<(llvm::raw_ostream &, const DirectiveMap::Code &); llvm::raw_ostream &operator<<(llvm::raw_ostream &, - const PPStructure::Directive &); + const DirectiveMap::Directive &); llvm::raw_ostream &operator<<(llvm::raw_ostream &, - const PPStructure::Conditional &); + const DirectiveMap::Conditional &); // FIXME: This approximates std::variant. // Switch once we can use C++17. -class PPStructure::Chunk { +class DirectiveMap::Chunk { public: enum Kind { K_Empty, K_Code, K_Directive, K_Conditional }; Kind kind() const { diff --git a/clang/lib/Basic/Module.cpp b/clang/lib/Basic/Module.cpp --- a/clang/lib/Basic/Module.cpp +++ b/clang/lib/Basic/Module.cpp @@ -267,7 +267,7 @@ return llvm::makeArrayRef(TopHeaders.begin(), TopHeaders.end()); } -bool Module::directlyUses(const Module *Requested) const { +bool Module::directlyUses(const Module *Requested) { auto *Top = getTopLevelModule(); // A top-level module implicitly uses itself. @@ -282,6 +282,9 @@ if (!Requested->Parent && Requested->Name == "_Builtin_stddef_max_align_t") return true; + if (NoUndeclaredIncludes) + UndeclaredUses.insert(Requested); + return false; } diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -15778,6 +15778,8 @@ return Builder.CreateTrunc(LoadIntrinsic, Int16Ty); } // FMA variations + case PPC::BI__builtin_ppc_fnmsub: + case PPC::BI__builtin_ppc_fnmsubs: case PPC::BI__builtin_vsx_xvmaddadp: case PPC::BI__builtin_vsx_xvmaddasp: case PPC::BI__builtin_vsx_xvnmaddadp: @@ -15816,6 +15818,8 @@ F, {X, Y, Builder.CreateFNeg(Z, "neg")}); else return Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")}); + case PPC::BI__builtin_ppc_fnmsub: + case PPC::BI__builtin_ppc_fnmsubs: case PPC::BI__builtin_vsx_xvnmsubadp: case PPC::BI__builtin_vsx_xvnmsubasp: if (Builder.getIsFPConstrained()) @@ -15824,10 +15828,9 @@ F, {X, Y, Builder.CreateFNeg(Z, "neg")}), "neg"); else - return Builder.CreateFNeg( - Builder.CreateCall(F, {X, Y, Builder.CreateFNeg(Z, "neg")}), - "neg"); - } + return Builder.CreateCall( + CGM.getIntrinsic(Intrinsic::ppc_fnmsub, ResultType), {X, Y, Z}); + } llvm_unreachable("Unknown FMA operation"); return nullptr; // Suppress no-return warning } diff --git a/clang/lib/Format/UnwrappedLineFormatter.cpp b/clang/lib/Format/UnwrappedLineFormatter.cpp --- a/clang/lib/Format/UnwrappedLineFormatter.cpp +++ b/clang/lib/Format/UnwrappedLineFormatter.cpp @@ -310,6 +310,8 @@ for (; J != AnnotatedLines.begin(); --J) if ((*J)->Level < TheLine->Level) break; + if ((*J)->Level >= TheLine->Level) + return false; // Check if the found line starts a record. const FormatToken *LastNonComment = (*J)->Last; diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp --- a/clang/lib/Parse/ParseDecl.cpp +++ b/clang/lib/Parse/ParseDecl.cpp @@ -105,7 +105,6 @@ void Parser::ParseAttributes(unsigned WhichAttrKinds, ParsedAttributesWithRange &Attrs, - SourceLocation *End, LateParsedAttrList *LateAttrs) { bool MoreToParse; do { @@ -113,11 +112,11 @@ // parsed, loop to ensure all specified attribute combinations are parsed. MoreToParse = false; if (WhichAttrKinds & PAKM_CXX11) - MoreToParse |= MaybeParseCXX11Attributes(Attrs, End); + MoreToParse |= MaybeParseCXX11Attributes(Attrs); if (WhichAttrKinds & PAKM_GNU) - MoreToParse |= MaybeParseGNUAttributes(Attrs, End, LateAttrs); + MoreToParse |= MaybeParseGNUAttributes(Attrs, LateAttrs); if (WhichAttrKinds & PAKM_Declspec) - MoreToParse |= MaybeParseMicrosoftDeclSpecs(Attrs, End); + MoreToParse |= MaybeParseMicrosoftDeclSpecs(Attrs); } while (MoreToParse); } @@ -163,14 +162,11 @@ /// /// We follow the C++ model, but don't allow junk after the identifier. void Parser::ParseGNUAttributes(ParsedAttributesWithRange &Attrs, - SourceLocation *EndLoc, LateParsedAttrList *LateAttrs, Declarator *D) { assert(Tok.is(tok::kw___attribute) && "Not a GNU attribute list!"); - SourceLocation StartLoc = Tok.getLocation(), Loc; - - if (!EndLoc) - EndLoc = &Loc; + SourceLocation StartLoc = Tok.getLocation(); + SourceLocation EndLoc = StartLoc; while (Tok.is(tok::kw___attribute)) { SourceLocation AttrTokLoc = ConsumeToken(); @@ -214,7 +210,7 @@ // Handle "parameterized" attributes if (!LateAttrs || !isAttributeLateParsed(*AttrName)) { - ParseGNUAttributeArgs(AttrName, AttrNameLoc, Attrs, EndLoc, nullptr, + ParseGNUAttributeArgs(AttrName, AttrNameLoc, Attrs, &EndLoc, nullptr, SourceLocation(), ParsedAttr::AS_GNU, D); continue; } @@ -247,8 +243,7 @@ SourceLocation Loc = Tok.getLocation(); if (ExpectAndConsume(tok::r_paren)) SkipUntil(tok::r_paren, StopAtSemi); - if (EndLoc) - *EndLoc = Loc; + EndLoc = Loc; // If this was declared in a macro, attach the macro IdentifierInfo to the // parsed attribute. @@ -270,7 +265,7 @@ } } - Attrs.Range = SourceRange(StartLoc, *EndLoc); + Attrs.Range = SourceRange(StartLoc, EndLoc); } /// Determine whether the given attribute has an identifier argument. @@ -750,11 +745,13 @@ /// [MS] extended-decl-modifier-seq: /// extended-decl-modifier[opt] /// extended-decl-modifier extended-decl-modifier-seq -void Parser::ParseMicrosoftDeclSpecs(ParsedAttributes &Attrs, - SourceLocation *End) { +void Parser::ParseMicrosoftDeclSpecs(ParsedAttributesWithRange &Attrs) { assert(getLangOpts().DeclSpecKeyword && "__declspec keyword is not enabled"); assert(Tok.is(tok::kw___declspec) && "Not a declspec!"); + SourceLocation StartLoc = Tok.getLocation(); + SourceLocation EndLoc = StartLoc; + while (Tok.is(tok::kw___declspec)) { ConsumeToken(); BalancedDelimiterTracker T(*this, tok::l_paren); @@ -817,9 +814,10 @@ ParsedAttr::AS_Declspec); } T.consumeClose(); - if (End) - *End = T.getCloseLocation(); + EndLoc = T.getCloseLocation(); } + + Attrs.Range = SourceRange(StartLoc, EndLoc); } void Parser::ParseMicrosoftTypeAttributes(ParsedAttributes &attrs) { @@ -3663,8 +3661,7 @@ // Attributes support. case tok::kw___attribute: case tok::kw___declspec: - ParseAttributes(PAKM_GNU | PAKM_Declspec, DS.getAttributes(), nullptr, - LateAttrs); + ParseAttributes(PAKM_GNU | PAKM_Declspec, DS.getAttributes(), LateAttrs); continue; // Microsoft single token adornments. diff --git a/clang/lib/Parse/ParseDeclCXX.cpp b/clang/lib/Parse/ParseDeclCXX.cpp --- a/clang/lib/Parse/ParseDeclCXX.cpp +++ b/clang/lib/Parse/ParseDeclCXX.cpp @@ -4513,19 +4513,17 @@ /// /// attribute-specifier-seq: /// attribute-specifier-seq[opt] attribute-specifier -void Parser::ParseCXX11Attributes(ParsedAttributesWithRange &attrs, - SourceLocation *endLoc) { +void Parser::ParseCXX11Attributes(ParsedAttributesWithRange &attrs) { assert(standardAttributesAllowed()); - SourceLocation StartLoc = Tok.getLocation(), Loc; - if (!endLoc) - endLoc = &Loc; + SourceLocation StartLoc = Tok.getLocation(); + SourceLocation EndLoc = StartLoc; do { - ParseCXX11AttributeSpecifier(attrs, endLoc); + ParseCXX11AttributeSpecifier(attrs, &EndLoc); } while (isCXX11AttributeSpecifier()); - attrs.Range = SourceRange(StartLoc, *endLoc); + attrs.Range = SourceRange(StartLoc, EndLoc); } void Parser::DiagnoseAndSkipCXX11Attributes() { @@ -4658,10 +4656,11 @@ /// [MS] ms-attribute-seq: /// ms-attribute[opt] /// ms-attribute ms-attribute-seq -void Parser::ParseMicrosoftAttributes(ParsedAttributes &attrs, - SourceLocation *endLoc) { +void Parser::ParseMicrosoftAttributes(ParsedAttributesWithRange &Attrs) { assert(Tok.is(tok::l_square) && "Not a Microsoft attribute list"); + SourceLocation StartLoc = Tok.getLocation(); + SourceLocation EndLoc = StartLoc; do { // FIXME: If this is actually a C++11 attribute, parse it as one. BalancedDelimiterTracker T(*this, tok::l_square); @@ -4681,15 +4680,16 @@ if (Tok.isNot(tok::identifier)) // ']', but also eof break; if (Tok.getIdentifierInfo()->getName() == "uuid") - ParseMicrosoftUuidAttributeArgs(attrs); + ParseMicrosoftUuidAttributeArgs(Attrs); else ConsumeToken(); } T.consumeClose(); - if (endLoc) - *endLoc = T.getCloseLocation(); + EndLoc = T.getCloseLocation(); } while (Tok.is(tok::l_square)); + + Attrs.Range = SourceRange(StartLoc, EndLoc); } void Parser::ParseMicrosoftIfExistsClassDeclaration( diff --git a/clang/lib/Parse/ParseExprCXX.cpp b/clang/lib/Parse/ParseExprCXX.cpp --- a/clang/lib/Parse/ParseExprCXX.cpp +++ b/clang/lib/Parse/ParseExprCXX.cpp @@ -1252,7 +1252,7 @@ TemplateParameterDepthRAII CurTemplateDepthTracker(TemplateParameterDepth); Actions.PushLambdaScope(); - ParsedAttributes Attr(AttrFactory); + ParsedAttributesWithRange Attr(AttrFactory); if (getLangOpts().CUDA) { // In CUDA code, GNU attributes are allowed to appear immediately after the // "[...]", even if there is no "(...)" before the lambda body. @@ -1355,7 +1355,8 @@ DeclEndLoc = ESpecRange.getEnd(); // Parse attribute-specifier[opt]. - MaybeParseCXX11Attributes(Attr, &DeclEndLoc); + if (MaybeParseCXX11Attributes(Attr)) + DeclEndLoc = Attr.Range.getEnd(); // Parse OpenCL addr space attribute. if (Tok.isOneOf(tok::kw___private, tok::kw___global, tok::kw___local, diff --git a/clang/lib/Parse/ParsePragma.cpp b/clang/lib/Parse/ParsePragma.cpp --- a/clang/lib/Parse/ParsePragma.cpp +++ b/clang/lib/Parse/ParsePragma.cpp @@ -341,7 +341,7 @@ Token &FirstToken) override; /// A pool of attributes that were parsed in \#pragma clang attribute. - ParsedAttributes AttributesForPragmaAttribute; + ParsedAttributesWithRange AttributesForPragmaAttribute; }; struct PragmaMaxTokensHereHandler : public PragmaHandler { @@ -1365,12 +1365,13 @@ namespace { struct PragmaAttributeInfo { enum ActionType { Push, Pop, Attribute }; - ParsedAttributes &Attributes; + ParsedAttributesWithRange &Attributes; ActionType Action; const IdentifierInfo *Namespace = nullptr; ArrayRef Tokens; - PragmaAttributeInfo(ParsedAttributes &Attributes) : Attributes(Attributes) {} + PragmaAttributeInfo(ParsedAttributesWithRange &Attributes) + : Attributes(Attributes) {} }; #include "clang/Parse/AttrSubMatchRulesParserStringSwitches.inc" @@ -1640,7 +1641,7 @@ /*IsReinject=*/false); ConsumeAnnotationToken(); - ParsedAttributes &Attrs = Info->Attributes; + ParsedAttributesWithRange &Attrs = Info->Attributes; Attrs.clearListOnly(); auto SkipToEnd = [this]() { diff --git a/clang/lib/Parse/ParseStmt.cpp b/clang/lib/Parse/ParseStmt.cpp --- a/clang/lib/Parse/ParseStmt.cpp +++ b/clang/lib/Parse/ParseStmt.cpp @@ -106,7 +106,7 @@ // at the start of the statement. Thus, we're not using MaybeParseAttributes // here because we don't want to allow arbitrary orderings. ParsedAttributesWithRange Attrs(AttrFactory); - MaybeParseCXX11Attributes(Attrs, nullptr, /*MightBeObjCMessageSend*/ true); + MaybeParseCXX11Attributes(Attrs, /*MightBeObjCMessageSend*/ true); if (getLangOpts().OpenCL) MaybeParseGNUAttributes(Attrs); @@ -1119,8 +1119,7 @@ ConsumeToken(); ParsedAttributesWithRange attrs(AttrFactory); - MaybeParseCXX11Attributes(attrs, nullptr, - /*MightBeObjCMessageSend*/ true); + MaybeParseCXX11Attributes(attrs, /*MightBeObjCMessageSend*/ true); // If this is the start of a declaration, parse it as such. if (isDeclarationStatement()) { diff --git a/clang/lib/Parse/ParseTentative.cpp b/clang/lib/Parse/ParseTentative.cpp --- a/clang/lib/Parse/ParseTentative.cpp +++ b/clang/lib/Parse/ParseTentative.cpp @@ -1913,7 +1913,7 @@ /*OuterMightBeMessageSend*/true)) return TPResult::True; - ParsedAttributes attrs(AttrFactory); + ParsedAttributesWithRange attrs(AttrFactory); MaybeParseMicrosoftAttributes(attrs); // decl-specifier-seq diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -164,8 +164,8 @@ std::set GetAllModuleMaps(const HeaderSearch &HS, Module *RootModule) { std::set ModuleMaps{}; - std::set ProcessedModules; - SmallVector ModulesToProcess{RootModule}; + std::set ProcessedModules; + SmallVector ModulesToProcess{RootModule}; SmallVector FilesByUID; HS.getFileMgr().GetUniqueIDMapping(FilesByUID); @@ -209,6 +209,11 @@ } ModulesToProcess.push_back(ImportedModule); } + + for (const Module *UndeclaredModule : CurrentModule->UndeclaredUses) + if (UndeclaredModule && + ProcessedModules.find(UndeclaredModule) == ProcessedModules.end()) + ModulesToProcess.push_back(UndeclaredModule); } return ModuleMaps; @@ -2861,6 +2866,8 @@ // Might be unnecessary as use declarations are only used to build the // module itself. + // TODO: Consider serializing undeclared uses of modules. + // Emit the link libraries. for (const auto &LL : Mod->LinkLibraries) { RecordData::value_type Record[] = {SUBMODULE_LINK_LIBRARY, diff --git a/clang/lib/StaticAnalyzer/Checkers/DeadStoresChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/DeadStoresChecker.cpp --- a/clang/lib/StaticAnalyzer/Checkers/DeadStoresChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/DeadStoresChecker.cpp @@ -107,11 +107,8 @@ dyn_cast(Ex->IgnoreParenCasts()); if (!BO) break; - if (BO->getOpcode() == BO_Assign) { - Ex = BO->getRHS(); - continue; - } - if (BO->getOpcode() == BO_Comma) { + BinaryOperatorKind Op = BO->getOpcode(); + if (Op == BO_Assign || Op == BO_Comma) { Ex = BO->getRHS(); continue; } diff --git a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp --- a/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp @@ -580,7 +580,9 @@ {{"fgetln"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, {{"fgets"}, TR::Prop({{2}}, {{0, ReturnValueIndex}})}, {{"fscanf"}, TR::Prop({{0}}, {{}, 2})}, + {{"fscanf_s"}, TR::Prop({{0}}, {{}, {2}})}, {{"sscanf"}, TR::Prop({{0}}, {{}, 2})}, + {{"getc"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, {{"getc_unlocked"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, {{"getdelim"}, TR::Prop({{3}}, {{0}})}, @@ -592,6 +594,78 @@ {{"strrchr"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, {{"tolower"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, {{"toupper"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{"fread"}, TR::Prop({{3}}, {{0, ReturnValueIndex}})}, + {{"recv"}, TR::Prop({{0}}, {{1, ReturnValueIndex}})}, + {{"recvfrom"}, TR::Prop({{0}}, {{1, ReturnValueIndex}})}, + + {{"ttyname"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{"ttyname_r"}, TR::Prop({{0}}, {{1, ReturnValueIndex}})}, + + {{"basename"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{"dirname"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{"fnmatch"}, TR::Prop({{1}}, {{ReturnValueIndex}})}, + {{"memchr"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{"memrchr"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{"rawmemchr"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + + {{"mbtowc"}, TR::Prop({{1}}, {{0, ReturnValueIndex}})}, + {{"wctomb"}, TR::Prop({{1}}, {{0, ReturnValueIndex}})}, + {{"wcwidth"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + + {{"memcmp"}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})}, + {{"memcpy"}, TR::Prop({{1}}, {{0, ReturnValueIndex}})}, + {{"memmove"}, TR::Prop({{1}}, {{0, ReturnValueIndex}})}, + // If memmem was called with a tainted needle and the search was + // successful, that would mean that the value pointed by the return value + // has the same content as the needle. If we choose to go by the policy of + // content equivalence implies taintedness equivalence, that would mean + // haystack should be considered a propagation source argument. + {{"memmem"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + + // The comment for memmem above also applies to strstr. + {{"strstr"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{"strcasestr"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + + {{"strchrnul"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + + {{"index"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{"rindex"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + + // FIXME: In case of arrays, only the first element of the array gets + // tainted. + {{"qsort"}, TR::Prop({{0}}, {{0}})}, + {{"qsort_r"}, TR::Prop({{0}}, {{0}})}, + + {{"strcmp"}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})}, + {{"strcasecmp"}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})}, + {{"strncmp"}, TR::Prop({{0, 1, 2}}, {{ReturnValueIndex}})}, + {{"strncasecmp"}, TR::Prop({{0, 1, 2}}, {{ReturnValueIndex}})}, + {{"strspn"}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})}, + {{"strcspn"}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})}, + {{"strpbrk"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{"strndup"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{"strndupa"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{"strlen"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{"strnlen"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{"strtol"}, TR::Prop({{0}}, {{1, ReturnValueIndex}})}, + {{"strtoll"}, TR::Prop({{0}}, {{1, ReturnValueIndex}})}, + {{"strtoul"}, TR::Prop({{0}}, {{1, ReturnValueIndex}})}, + {{"strtoull"}, TR::Prop({{0}}, {{1, ReturnValueIndex}})}, + + {{"isalnum"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{"isalpha"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{"isascii"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{"isblank"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{"iscntrl"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{"isdigit"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{"isgraph"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{"islower"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{"isprint"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{"ispunct"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{"isspace"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{"isupper"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{"isxdigit"}, TR::Prop({{0}}, {{ReturnValueIndex}})}, + {{CDF_MaybeBuiltin, {BI.getName(Builtin::BIstrncat)}}, TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})}, {{CDF_MaybeBuiltin, {BI.getName(Builtin::BIstrlcpy)}}, @@ -927,7 +1001,6 @@ } /// Checker registration - void ento::registerGenericTaintChecker(CheckerManager &Mgr) { Mgr.registerChecker(); } diff --git a/clang/lib/Tooling/Syntax/Pseudo/CMakeLists.txt b/clang/lib/Tooling/Syntax/Pseudo/CMakeLists.txt --- a/clang/lib/Tooling/Syntax/Pseudo/CMakeLists.txt +++ b/clang/lib/Tooling/Syntax/Pseudo/CMakeLists.txt @@ -1,13 +1,13 @@ set(LLVM_LINK_COMPONENTS Support) add_clang_library(clangToolingSyntaxPseudo + DirectiveMap.cpp Grammar.cpp GrammarBNF.cpp Lex.cpp LRGraph.cpp LRTable.cpp LRTableBuild.cpp - Preprocess.cpp Token.cpp LINK_LIBS diff --git a/clang/lib/Tooling/Syntax/Pseudo/Preprocess.cpp b/clang/lib/Tooling/Syntax/Pseudo/DirectiveMap.cpp rename from clang/lib/Tooling/Syntax/Pseudo/Preprocess.cpp rename to clang/lib/Tooling/Syntax/Pseudo/DirectiveMap.cpp --- a/clang/lib/Tooling/Syntax/Pseudo/Preprocess.cpp +++ b/clang/lib/Tooling/Syntax/Pseudo/DirectiveMap.cpp @@ -1,4 +1,4 @@ -//===--- Preprocess.cpp - Preprocess token streams ------------------------===// +//===--- DirectiveMap.cpp - Find and strip preprocessor directives --------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "clang/Tooling/Syntax/Pseudo/Preprocess.h" +#include "clang/Tooling/Syntax/Pseudo/DirectiveMap.h" #include "clang/Basic/IdentifierTable.h" #include "clang/Basic/TokenKinds.h" #include "llvm/Support/FormatVariadic.h" @@ -16,10 +16,11 @@ namespace pseudo { namespace { -class PPParser { +class DirectiveParser { public: - explicit PPParser(const TokenStream &Code) : Code(Code), Tok(&Code.front()) {} - void parse(PPStructure *Result) { parse(Result, /*TopLevel=*/true); } + explicit DirectiveParser(const TokenStream &Code) + : Code(Code), Tok(&Code.front()) {} + void parse(DirectiveMap *Result) { parse(Result, /*TopLevel=*/true); } private: // Roles that a directive might take within a conditional block. @@ -42,10 +43,11 @@ } } - // Parses tokens starting at Tok into PP. - // If we reach an End or Else directive that ends PP, returns it. + // Parses tokens starting at Tok into Map. + // If we reach an End or Else directive that ends Map, returns it. // If TopLevel is true, then we do not expect End and always return None. - llvm::Optional parse(PPStructure *PP, bool TopLevel) { + llvm::Optional parse(DirectiveMap *Map, + bool TopLevel) { auto StartsDirective = [&, AllowDirectiveAt((const Token *)nullptr)]() mutable { if (Tok->flag(LexFlags::StartsPPLine)) { @@ -65,29 +67,29 @@ do ++Tok; while (Tok->Kind != tok::eof && !StartsDirective()); - PP->Chunks.push_back(PPStructure::Code{ + Map->Chunks.push_back(DirectiveMap::Code{ Token::Range{Code.index(*Start), Code.index(*Tok)}}); continue; } // We have some kind of directive. - PPStructure::Directive Directive; + DirectiveMap::Directive Directive; parseDirective(&Directive); Cond Kind = classifyDirective(Directive.Kind); if (Kind == Cond::If) { // #if or similar, starting a nested conditional block. - PPStructure::Conditional Conditional; + DirectiveMap::Conditional Conditional; Conditional.Branches.emplace_back(); Conditional.Branches.back().first = std::move(Directive); parseConditional(&Conditional); - PP->Chunks.push_back(std::move(Conditional)); + Map->Chunks.push_back(std::move(Conditional)); } else if ((Kind == Cond::Else || Kind == Cond::End) && !TopLevel) { - // #endif or similar, ending this PPStructure scope. + // #endif or similar, ending this PStructure scope. // (#endif is unexpected at the top level, treat as simple directive). return std::move(Directive); } else { // #define or similar, a simple directive at the current scope. - PP->Chunks.push_back(std::move(Directive)); + Map->Chunks.push_back(std::move(Directive)); } } return None; @@ -95,7 +97,7 @@ // Parse the rest of a conditional section, after seeing the If directive. // Returns after consuming the End directive. - void parseConditional(PPStructure::Conditional *C) { + void parseConditional(DirectiveMap::Conditional *C) { assert(C->Branches.size() == 1 && C->Branches.front().second.Chunks.empty() && "Should be ready to parse first branch body"); @@ -118,7 +120,7 @@ } // Parse a directive. Tok is the hash. - void parseDirective(PPStructure::Directive *D) { + void parseDirective(DirectiveMap::Directive *D) { assert(Tok->Kind == tok::hash); // Directive spans from the hash until the end of line or file. @@ -142,25 +144,26 @@ } // namespace -PPStructure PPStructure::parse(const TokenStream &Code) { - PPStructure Result; - PPParser(Code).parse(&Result); +DirectiveMap DirectiveMap::parse(const TokenStream &Code) { + DirectiveMap Result; + DirectiveParser(Code).parse(&Result); return Result; } -static void dump(llvm::raw_ostream &OS, const PPStructure &, unsigned Indent); -static void dump(llvm::raw_ostream &OS, const PPStructure::Directive &Directive, - unsigned Indent) { +static void dump(llvm::raw_ostream &OS, const DirectiveMap &, unsigned Indent); +static void dump(llvm::raw_ostream &OS, + const DirectiveMap::Directive &Directive, unsigned Indent) { OS.indent(Indent) << llvm::formatv("#{0} ({1} tokens)\n", tok::getPPKeywordSpelling(Directive.Kind), Directive.Tokens.size()); } -static void dump(llvm::raw_ostream &OS, const PPStructure::Code &Code, +static void dump(llvm::raw_ostream &OS, const DirectiveMap::Code &Code, unsigned Indent) { OS.indent(Indent) << llvm::formatv("code ({0} tokens)\n", Code.Tokens.size()); } static void dump(llvm::raw_ostream &OS, - const PPStructure::Conditional &Conditional, unsigned Indent) { + const DirectiveMap::Conditional &Conditional, + unsigned Indent) { for (const auto &Branch : Conditional.Branches) { dump(OS, Branch.first, Indent); dump(OS, Branch.second, Indent + 2); @@ -168,23 +171,23 @@ dump(OS, Conditional.End, Indent); } -static void dump(llvm::raw_ostream &OS, const PPStructure::Chunk &Chunk, +static void dump(llvm::raw_ostream &OS, const DirectiveMap::Chunk &Chunk, unsigned Indent) { switch (Chunk.kind()) { - case PPStructure::Chunk::K_Empty: + case DirectiveMap::Chunk::K_Empty: llvm_unreachable("invalid chunk"); - case PPStructure::Chunk::K_Code: - return dump(OS, (const PPStructure::Code &)Chunk, Indent); - case PPStructure::Chunk::K_Directive: - return dump(OS, (const PPStructure::Directive &)Chunk, Indent); - case PPStructure::Chunk::K_Conditional: - return dump(OS, (const PPStructure::Conditional &)Chunk, Indent); + case DirectiveMap::Chunk::K_Code: + return dump(OS, (const DirectiveMap::Code &)Chunk, Indent); + case DirectiveMap::Chunk::K_Directive: + return dump(OS, (const DirectiveMap::Directive &)Chunk, Indent); + case DirectiveMap::Chunk::K_Conditional: + return dump(OS, (const DirectiveMap::Conditional &)Chunk, Indent); } } -static void dump(llvm::raw_ostream &OS, const PPStructure &PP, +static void dump(llvm::raw_ostream &OS, const DirectiveMap &Map, unsigned Indent) { - for (const auto &Chunk : PP.Chunks) + for (const auto &Chunk : Map.Chunks) dump(OS, Chunk, Indent); } @@ -194,11 +197,11 @@ dump(OS, T, 0); \ return OS; \ } -OSTREAM_DUMP(PPStructure) -OSTREAM_DUMP(PPStructure::Chunk) -OSTREAM_DUMP(PPStructure::Directive) -OSTREAM_DUMP(PPStructure::Conditional) -OSTREAM_DUMP(PPStructure::Code) +OSTREAM_DUMP(DirectiveMap) +OSTREAM_DUMP(DirectiveMap::Chunk) +OSTREAM_DUMP(DirectiveMap::Directive) +OSTREAM_DUMP(DirectiveMap::Conditional) +OSTREAM_DUMP(DirectiveMap::Code) #undef OSTREAM_DUMP } // namespace pseudo diff --git a/clang/lib/Tooling/Syntax/Pseudo/README.md b/clang/lib/Tooling/Syntax/Pseudo/README.md new file mode 100644 --- /dev/null +++ b/clang/lib/Tooling/Syntax/Pseudo/README.md @@ -0,0 +1,37 @@ +# clang pseudoparser + +This directory implements an approximate heuristic parser for C++, based on the +clang lexer, the C++ grammar, and the GLR parsing algorithm. + +It parses a file in isolation, without reading its included headers. +The result is a strict syntactic tree whose structure follows the C++ grammar. +There is no semantic analysis, apart from guesses to disambiguate the parse. +Disambiguation can optionally be guided by an AST or a symbol index. + +For now, the best reference on intended scope is the [design proposal], +with further discussion on the [RFC]. + +## Dependencies between pseudoparser and clang + +Dependencies are limited because they don't make sense, but also to avoid +placing a burden on clang mantainers. + +The pseudoparser reuses the clang lexer (clangLex and clangBasic libraries) but +not the higher-level libraries (Parse, Sema, AST, Frontend...). + +When the pseudoparser should be used together with an AST (e.g. to guide +disambiguation), this is a separate "bridge" library that depends on both. + +Clang does not depend on the pseudoparser at all. If this seems useful in future +it should be discussed by RFC. + +## Parity between pseudoparser and clang + +The pseudoparser aims to understand real-world code, and particularly the +languages and extensions supported by Clang. + +However we don't try to keep these in lockstep: there's no expectation that +Clang parser changes are accompanied by pseudoparser changes or vice versa. + +[design proposal]: https://docs.google.com/document/d/1eGkTOsFja63wsv8v0vd5JdoTonj-NlN3ujGF0T7xDbM/edit +[RFC]: https://discourse.llvm.org/t/rfc-a-c-pseudo-parser-for-tooling/59217/49 diff --git a/clang/test/Analysis/taint-generic.c b/clang/test/Analysis/taint-generic.c --- a/clang/test/Analysis/taint-generic.c +++ b/clang/test/Analysis/taint-generic.c @@ -1,20 +1,26 @@ -// RUN: %clang_analyze_cc1 -Wno-format-security -Wno-pointer-to-int-cast -verify %s \ +// RUN: %clang_analyze_cc1 -Wno-format-security -Wno-pointer-to-int-cast \ +// RUN: -Wno-incompatible-library-redeclaration -verify %s \ // RUN: -analyzer-checker=alpha.security.taint \ // RUN: -analyzer-checker=core \ // RUN: -analyzer-checker=alpha.security.ArrayBoundV2 \ +// RUN: -analyzer-checker=debug.ExprInspection \ // RUN: -analyzer-config \ // RUN: alpha.security.taint.TaintPropagation:Config=%S/Inputs/taint-generic-config.yaml -// RUN: %clang_analyze_cc1 -Wno-format-security -Wno-pointer-to-int-cast -verify %s \ +// RUN: %clang_analyze_cc1 -Wno-format-security -Wno-pointer-to-int-cast \ +// RUN: -Wno-incompatible-library-redeclaration -verify %s \ // RUN: -DFILE_IS_STRUCT \ // RUN: -analyzer-checker=alpha.security.taint \ // RUN: -analyzer-checker=core \ // RUN: -analyzer-checker=alpha.security.ArrayBoundV2 \ +// RUN: -analyzer-checker=debug.ExprInspection \ // RUN: -analyzer-config \ // RUN: alpha.security.taint.TaintPropagation:Config=%S/Inputs/taint-generic-config.yaml -// RUN: not %clang_analyze_cc1 -Wno-pointer-to-int-cast -verify %s \ +// RUN: not %clang_analyze_cc1 -Wno-pointer-to-int-cast \ +// RUN: -Wno-incompatible-library-redeclaration -verify %s \ // RUN: -analyzer-checker=alpha.security.taint \ +// RUN: -analyzer-checker=debug.ExprInspection \ // RUN: -analyzer-config \ // RUN: alpha.security.taint.TaintPropagation:Config=justguessit \ // RUN: 2>&1 | FileCheck %s -check-prefix=CHECK-INVALID-FILE @@ -24,8 +30,10 @@ // CHECK-INVALID-FILE-SAME: that expects a valid filename instead of // CHECK-INVALID-FILE-SAME: 'justguessit' -// RUN: not %clang_analyze_cc1 -verify %s \ +// RUN: not %clang_analyze_cc1 -Wno-incompatible-library-redeclaration \ +// RUN: -verify %s \ // RUN: -analyzer-checker=alpha.security.taint \ +// RUN: -analyzer-checker=debug.ExprInspection \ // RUN: -analyzer-config \ // RUN: alpha.security.taint.TaintPropagation:Config=%S/Inputs/taint-generic-config-ill-formed.yaml \ // RUN: 2>&1 | FileCheck -DMSG=%errc_EINVAL %s -check-prefix=CHECK-ILL-FORMED @@ -34,8 +42,10 @@ // CHECK-ILL-FORMED-SAME: 'alpha.security.taint.TaintPropagation:Config', // CHECK-ILL-FORMED-SAME: that expects a valid yaml file: [[MSG]] -// RUN: not %clang_analyze_cc1 -verify %s \ +// RUN: not %clang_analyze_cc1 -Wno-incompatible-library-redeclaration \ +// RUN: -verify %s \ // RUN: -analyzer-checker=alpha.security.taint \ +// RUN: -analyzer-checker=debug.ExprInspection \ // RUN: -analyzer-config \ // RUN: alpha.security.taint.TaintPropagation:Config=%S/Inputs/taint-generic-config-invalid-arg.yaml \ // RUN: 2>&1 | FileCheck %s -check-prefix=CHECK-INVALID-ARG @@ -46,6 +56,9 @@ // CHECK-INVALID-ARG-SAME: rules greater or equal to -1 typedef long long rsize_t; +void clang_analyzer_isTainted_char(char); +void clang_analyzer_isTainted_charp(char*); +void clang_analyzer_isTainted_int(int); int scanf(const char *restrict format, ...); char *gets(char *str); @@ -60,13 +73,18 @@ #endif #define bool _Bool +#define NULL (void*)0 char *getenv(const char *name); + +FILE *fopen(const char *name, const char *mode); + int fscanf(FILE *restrict stream, const char *restrict format, ...); int sprintf(char *str, const char *format, ...); void setproctitle(const char *fmt, ...); void setproctitle_init(int argc, char *argv[], char *envp[]); typedef __typeof(sizeof(int)) size_t; +typedef signed long long ssize_t; // Define string functions. Use builtin for some of them. They all default to // the processing in the taint checker. @@ -87,6 +105,13 @@ void *calloc(size_t nmemb, size_t size); void bcopy(void *s1, void *s2, size_t n); +typedef size_t socklen_t; + +struct sockaddr { + unsigned short sa_family; + char sa_data[14]; +}; + #define BUFSIZE 10 int Buffer[BUFSIZE]; @@ -388,7 +413,6 @@ return system(c); // expected-warning {{Untrusted data is passed to a system call}} } -typedef signed long long ssize_t; ssize_t readlink(const char *path, char *buf, size_t bufsiz); int testReadlink(char *path, char *buf, size_t bufsiz) { ssize_t s = readlink(path, buf, bufsiz); @@ -420,8 +444,6 @@ return system(name); // expected-warning {{Untrusted data is passed to a system call}} } -struct sockaddr; -typedef size_t socklen_t; int getnameinfo(const struct sockaddr *restrict addr, socklen_t addrlen, char *restrict host, socklen_t hostlen, char *restrict serv, socklen_t servlen, int flags); @@ -463,6 +485,503 @@ return system(buf); // expected-warning {{Untrusted data is passed to a system call}} } +int fscanf_s(FILE *stream, const char *format, ...); +void testFscanf_s(const char *fname, int *d) { + FILE *f = fopen(fname, "r"); + fscanf_s(f, "%d", d); + clang_analyzer_isTainted_int(*d); // expected-warning {{YES}} +} + +int fread(void *buffer, size_t size, size_t count, FILE *stream); +void testFread(const char *fname, int *buffer, size_t size, size_t count) { + FILE *f = fopen(fname, "r"); + size_t read = fread(buffer, size, count, f); + + clang_analyzer_isTainted_int(*buffer); // expected-warning {{YES}} + clang_analyzer_isTainted_int(read); // expected-warning {{YES}} +} + +ssize_t recv(int sockfd, void *buf, size_t len, int flags); +void testRecv(int *buf, size_t len, int flags) { + int fd; + scanf("%d", &fd); // fake a tainted a file descriptor + + size_t read = recv(fd, buf, len, flags); + clang_analyzer_isTainted_int(*buf); // expected-warning {{YES}} + clang_analyzer_isTainted_int(read); // expected-warning {{YES}} +} + +ssize_t recvfrom(int sockfd, void *restrict buf, size_t len, int flags, + struct sockaddr *restrict src_addr, + socklen_t *restrict addrlen); +void testRecvfrom(int *restrict buf, size_t len, int flags, + struct sockaddr *restrict src_addr, + socklen_t *restrict addrlen) { + int fd; + scanf("%d", &fd); // fake a tainted a file descriptor + + size_t read = recvfrom(fd, buf, len, flags, src_addr, addrlen); + clang_analyzer_isTainted_int(*buf); // expected-warning {{YES}} + clang_analyzer_isTainted_int(read); // expected-warning {{YES}} +} + +char *ttyname(int fd); +void testTtyname() { + int fd; + scanf("%d", &fd); // fake a tainted a file descriptor + + char *name = ttyname(fd); + clang_analyzer_isTainted_charp(name); // expected-warning {{YES}} +} + +int ttyname_r(int fd, char *buf, size_t buflen); +void testTtyname_r(char *buf, size_t buflen) { + int fd; + scanf("%d", &fd); // fake a tainted a file descriptor + + int result = ttyname_r(fd, buf, buflen); + clang_analyzer_isTainted_char(*buf); // expected-warning {{YES}} + clang_analyzer_isTainted_int(result); // expected-warning {{YES}} +} + +char *dirname(char *path); +void testDirname() { + char buf[10]; + scanf("%9s", buf); + + char *name = dirname(buf); + clang_analyzer_isTainted_charp(name); // expected-warning {{YES}} +} + +char *basename(char *path); +void testBasename() { + char buf[10]; + scanf("%9s", buf); + + char *name = basename(buf); + clang_analyzer_isTainted_charp(name); // expected-warning {{YES}} +} + +int fnmatch(const char *pattern, const char *string, int flags); +void testFnmatch(const char *pattern, int flags) { + char string[10]; + scanf("%9s", string); + + int result = fnmatch(pattern, string, flags); + clang_analyzer_isTainted_int(result); // expected-warning {{YES}} +} + +void *memchr(const void *s, int c, size_t n); +void testMemchr(int c, size_t n) { + char buf[10]; + scanf("%9s", buf); + + char *result = memchr(buf, c, n); + clang_analyzer_isTainted_charp(result); // expected-warning {{YES}} +} + +void *memrchr(const void *s, int c, size_t n); +void testMemrchr(int c, size_t n) { + char buf[10]; + scanf("%9s", buf); + + char *result = memrchr(buf, c, n); + clang_analyzer_isTainted_charp(result); // expected-warning {{YES}} +} + +void *rawmemchr(const void *s, int c); +void testRawmemchr(int c) { + char buf[10]; + scanf("%9s", buf); + + char *result = rawmemchr(buf, c); + clang_analyzer_isTainted_charp(result); // expected-warning {{YES}} +} + +typedef char wchar_t; +int mbtowc(wchar_t *pwc, const char *s, size_t n); +void testMbtowc(wchar_t *pwc, size_t n) { + char buf[10]; + scanf("%9s", buf); + + int result = mbtowc(pwc, buf, n); + clang_analyzer_isTainted_char(*pwc); // expected-warning {{YES}} + clang_analyzer_isTainted_int(result); // expected-warning {{YES}} +} + +int wctomb(char *s, wchar_t wc); +void testWctomb(char *buf) { + wchar_t wc; + scanf("%c", &wc); + + int result = wctomb(buf, wc); + clang_analyzer_isTainted_char(*buf); // expected-warning {{YES}} + clang_analyzer_isTainted_int(result); // expected-warning {{YES}} +} + +int wcwidth(wchar_t c); +void testWcwidth() { + wchar_t wc; + scanf("%c", &wc); + + int width = wcwidth(wc); + clang_analyzer_isTainted_int(width); // expected-warning {{YES}} +} + +int memcmp(const void *s1, const void *s2, size_t n); +void testMemcmpWithLHSTainted(size_t n, char *rhs) { + char lhs[10]; + scanf("%9s", lhs); + + int cmp_result = memcmp(lhs, rhs, n); + clang_analyzer_isTainted_int(cmp_result); // expected-warning {{YES}} +} + +void testMemcmpWithRHSTainted(size_t n, char *lhs) { + char rhs[10]; + scanf("%9s", rhs); + + int cmp_result = memcmp(lhs, rhs, n); + clang_analyzer_isTainted_int(cmp_result); // expected-warning {{YES}} +} + +void *memcpy(void *restrict dest, const void *restrict src, size_t n); +void testMemcpy(char *dst, size_t n) { + char src[10]; + scanf("%9s", src); + + char *result = memcpy(dst, src, n); + + clang_analyzer_isTainted_char(*dst); // expected-warning {{YES}} + clang_analyzer_isTainted_charp(result); // expected-warning {{YES}} +} + +void *memmove(void *dest, const void *src, size_t n); +void testMemmove(char *dst, size_t n) { + char src[10]; + scanf("%9s", src); + + char *result = memmove(dst, src, n); + + clang_analyzer_isTainted_char(*dst); // expected-warning {{YES}} + clang_analyzer_isTainted_charp(result); // expected-warning {{YES}} +} + +void *memmem(const void *haystack, size_t haystacklen, const void *needle, size_t needlelen); +void testMemmem(const void *needle, size_t needlelen) { + char haystack[10]; + scanf("%9s", haystack); + + char *result = memmem(haystack, 9, needle, needlelen); + clang_analyzer_isTainted_charp(result); // expected-warning {{YES}} +} + +char *strstr(const char *haystack, const char *needle); +void testStrstr(const char *needle) { + char haystack[10]; + scanf("%9s", haystack); + + char *result = strstr(haystack, needle); + clang_analyzer_isTainted_charp(result); // expected-warning {{YES}} +} + +char *strcasestr(const char *haystack, const char *needle); +void testStrcasestr(const char *needle) { + char haystack[10]; + scanf("%9s", haystack); + + char *result = strcasestr(haystack, needle); + clang_analyzer_isTainted_charp(result); // expected-warning {{YES}} +} + +char *strchrnul(const char *s, int c); +void testStrchrnul() { + char s[10]; + scanf("%9s", s); + + char *result = strchrnul(s, 9); + clang_analyzer_isTainted_charp(result); // expected-warning {{YES}} +} + +char *index(const char *s, int c); +void testIndex() { + char s[10]; + scanf("%9s", s); + + char *result = index(s, 9); + clang_analyzer_isTainted_charp(result); // expected-warning {{YES}} +} + +char *rindex(const char *s, int c); +void testRindex() { + char s[10]; + scanf("%9s", s); + + char *result = rindex(s, 9); + clang_analyzer_isTainted_charp(result); // expected-warning {{YES}} +} + +int strcmp(const char *s1, const char *s2); +void testStrcmpWithLHSTainted(char *rhs) { + char lhs[10]; + scanf("%9s", lhs); + + int cmp_result = strcmp(lhs, rhs); + clang_analyzer_isTainted_int(cmp_result); // expected-warning {{YES}} +} + +void testStrcmpWithRHSTainted(char *lhs) { + char rhs[10]; + scanf("%9s", rhs); + + int cmp_result = strcmp(lhs, rhs); + clang_analyzer_isTainted_int(cmp_result); // expected-warning {{YES}} +} +int strcasecmp(const char *s1, const char *s2); +void testStrcasecmpWithLHSTainted(char *rhs) { + char lhs[10]; + scanf("%9s", lhs); + + int cmp_result = strcasecmp(lhs, rhs); + clang_analyzer_isTainted_int(cmp_result); // expected-warning {{YES}} +} + +void testStrcasecmpWithRHSTainted(char *lhs) { + char rhs[10]; + scanf("%9s", rhs); + + int cmp_result = strcasecmp(lhs, rhs); + clang_analyzer_isTainted_int(cmp_result); // expected-warning {{YES}} +} +int strncmp(const char *s1, const char *s2, size_t n); +void testStrncmpWithLHSTainted(char *rhs, size_t n) { + char lhs[10]; + scanf("%9s", lhs); + + int cmp_result = strncmp(lhs, rhs, n); + clang_analyzer_isTainted_int(cmp_result); // expected-warning {{YES}} +} + +void testStrncmpWithRHSTainted(char *lhs, size_t n) { + char rhs[10]; + scanf("%9s", rhs); + + int cmp_result = strncmp(lhs, rhs, n); + clang_analyzer_isTainted_int(cmp_result); // expected-warning {{YES}} +} + +void testStrncmpWithNTainted(char *lhs, char *rhs) { + int n; + scanf("%d", &n); + + int cmp_result = strncmp(lhs, rhs, n); + clang_analyzer_isTainted_int(cmp_result); // expected-warning {{YES}} +} + +int strncasecmp(const char *s1, const char *s2, size_t n); +void testStrncasecmpWithLHSTainted(char *rhs, size_t n) { + char lhs[10]; + scanf("%9s", lhs); + + int cmp_result = strncmp(lhs, rhs, n); + clang_analyzer_isTainted_int(cmp_result); // expected-warning {{YES}} +} + +void testStrncasecmpWithRHSTainted(char *lhs, size_t n) { + char rhs[10]; + scanf("%9s", rhs); + + int cmp_result = strncmp(lhs, rhs, n); + clang_analyzer_isTainted_int(cmp_result); // expected-warning {{YES}} +} + +void testStrncasecmpWithNTainted(char *lhs, char *rhs) { + int n; + scanf("%d", &n); + + int cmp_result = strncmp(lhs, rhs, n); + clang_analyzer_isTainted_int(cmp_result); // expected-warning {{YES}} +} + +size_t strspn(const char *s, const char *accept); +void testStrspnFirstArgTainted(const char *accept) { + char s[10]; + scanf("%9s", s); + + size_t result = strspn(s, accept); + clang_analyzer_isTainted_int(result); // expected-warning {{YES}} +} + +void testStrspnSecondArgTainted(const char *s) { + char accept[10]; + scanf("%9s", accept); + + size_t result = strspn(s, accept); + clang_analyzer_isTainted_int(result); // expected-warning {{YES}} +} + +size_t strcspn(const char *s, const char *reject); +void testStrcspnFirstArgTainted(const char *reject) { + char s[10]; + scanf("%9s", s); + + size_t result = strcspn(s, reject); + clang_analyzer_isTainted_int(result); // expected-warning {{YES}} +} + +void testStrcspnSecondArgTainted(const char *s) { + char reject[10]; + scanf("%9s", reject); + + size_t result = strcspn(s, reject); + clang_analyzer_isTainted_int(result); // expected-warning {{YES}} +} + +char *strpbrk(const char *s, const char *accept); +void testStrpbrk(const char *accept) { + char s[10]; + scanf("%9s", s); + + char *result = strpbrk(s, accept); + clang_analyzer_isTainted_charp(result); // expected-warning {{YES}} +} + +char *strndup(const char *s, size_t n); +void testStrndup(size_t n) { + char s[10]; + scanf("%9s", s); + + char *result = strndup(s, n); + clang_analyzer_isTainted_charp(result); // expected-warning {{YES}} +} + +char *strdupa(const char *s); +void testStrdupa() { + char s[10]; + scanf("%9s", s); + + char *result = strdupa(s); + clang_analyzer_isTainted_charp(result); // expected-warning {{YES}} +} + +char *strndupa(const char *s, size_t n); +void testStrndupa(size_t n) { + char s[10]; + scanf("%9s", s); + + char *result = strndupa(s, n); + clang_analyzer_isTainted_charp(result); // expected-warning {{YES}} +} + +size_t strlen(const char *s); +void testStrlen() { + char s[10]; + scanf("%9s", s); + + size_t result = strlen(s); + clang_analyzer_isTainted_int(result); // expected-warning {{YES}} +} + +size_t strnlen(const char *s, size_t maxlen); +void testStrnlen(size_t maxlen) { + char s[10]; + scanf("%9s", s); + + size_t result = strnlen(s, maxlen); + clang_analyzer_isTainted_int(result); // expected-warning {{YES}} +} + +long strtol(const char *restrict nptr, char **restrict endptr, int base); +long long strtoll(const char *restrict nptr, char **restrict endptr, int base); +unsigned long int strtoul(const char *nptr, char **endptr, int base); +unsigned long long int strtoull(const char *nptr, char **endptr, int base); +void testStrtolVariants(char **restrict endptr, int base) { + char s[10]; + scanf("%9s", s); + + long result_l = strtol(s, endptr, base); + clang_analyzer_isTainted_int(result_l); // expected-warning {{YES}} + + long long result_ll = strtoll(s, endptr, base); + clang_analyzer_isTainted_int(result_ll); // expected-warning {{YES}} + + unsigned long result_ul = strtoul(s, endptr, base); + clang_analyzer_isTainted_int(result_ul); // expected-warning {{YES}} + + unsigned long long result_ull = strtoull(s, endptr, base); + clang_analyzer_isTainted_int(result_ull); // expected-warning {{YES}} +} + +int isalnum(int c); +int isalpha(int c); +int isascii(int c); +int isblank(int c); +int iscntrl(int c); +int isdigit(int c); +int isgraph(int c); +int islower(int c); +int isprint(int c); +int ispunct(int c); +int isspace(int c); +int isupper(int c); +int isxdigit(int c); + +void testIsFunctions() { + char c; + scanf("%c", &c); + + int alnum = isalnum(c); + clang_analyzer_isTainted_int(alnum); // expected-warning {{YES}} + + int alpha = isalpha(c); + clang_analyzer_isTainted_int(alpha); // expected-warning {{YES}} + + int ascii = isascii(c); + clang_analyzer_isTainted_int(ascii); // expected-warning {{YES}} + + int blank = isblank(c); + clang_analyzer_isTainted_int(blank); // expected-warning {{YES}} + + int cntrl = iscntrl(c); + clang_analyzer_isTainted_int(cntrl); // expected-warning {{YES}} + + int digit = isdigit(c); + clang_analyzer_isTainted_int(digit); // expected-warning {{YES}} + + int graph = isgraph(c); + clang_analyzer_isTainted_int(graph); // expected-warning {{YES}} + + int lower = islower(c); + clang_analyzer_isTainted_int(lower); // expected-warning {{YES}} + + int print = isprint(c); + clang_analyzer_isTainted_int(print); // expected-warning {{YES}} + + int punct = ispunct(c); + clang_analyzer_isTainted_int(punct); // expected-warning {{YES}} + + int space = isspace(c); + clang_analyzer_isTainted_int(space); // expected-warning {{YES}} + + int upper = isupper(c); + clang_analyzer_isTainted_int(upper); // expected-warning {{YES}} + + int xdigit = isxdigit(c); + clang_analyzer_isTainted_int(xdigit); // expected-warning {{YES}} +} + +void qsort(void *base, size_t nmemb, size_t size, int (*compar)(const void *, const void *)); +void qsort_r(void *base, size_t nmemb, size_t size, int (*compar)(const void *, const void *, void *), void *arg); +void testQsort() { + int data[1]; + scanf("%d", data); + + qsort(data, sizeof(data), sizeof(data[0]), NULL); + clang_analyzer_isTainted_int(data[0]); // expected-warning {{YES}} + qsort_r(data, sizeof(data), sizeof(data[0]), NULL, NULL); + clang_analyzer_isTainted_int(data[0]); // expected-warning {{YES}} +} + // Test configuration int mySource1(void); void mySource2(int*); diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-fma.c b/clang/test/CodeGen/PowerPC/builtins-ppc-fma.c --- a/clang/test/CodeGen/PowerPC/builtins-ppc-fma.c +++ b/clang/test/CodeGen/PowerPC/builtins-ppc-fma.c @@ -32,12 +32,8 @@ // CHECK: <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> [[RESULT]]) vf = __builtin_vsx_xvnmsubasp(vf, vf, vf); - // CHECK: [[RESULT:%[^ ]+]] = fneg <4 x float> %{{.*}} - // CHECK: [[RESULT2:%[^ ]+]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> [[RESULT]]) - // CHECK: fneg <4 x float> [[RESULT2]] + // CHECK: call <4 x float> @llvm.ppc.fnmsub.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) vd = __builtin_vsx_xvnmsubadp(vd, vd, vd); - // CHECK: [[RESULT:%[^ ]+]] = fneg <2 x double> %{{.*}} - // CHECK: [[RESULT2:%[^ ]+]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> [[RESULT]]) - // CHECK: fneg <2 x double> [[RESULT2]] + // CHECK: call <2 x double> @llvm.ppc.fnmsub.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) } diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-fpconstrained.c b/clang/test/CodeGen/PowerPC/builtins-ppc-fpconstrained.c --- a/clang/test/CodeGen/PowerPC/builtins-ppc-fpconstrained.c +++ b/clang/test/CodeGen/PowerPC/builtins-ppc-fpconstrained.c @@ -142,9 +142,7 @@ vf = __builtin_vsx_xvnmsubasp(vf, vf, vf); // CHECK-LABEL: try-xvnmsubasp - // CHECK-UNCONSTRAINED: [[RESULT0:%[^ ]+]] = fneg <4 x float> %{{.*}} - // CHECK-UNCONSTRAINED: [[RESULT1:%[^ ]+]] = call <4 x float> @llvm.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> [[RESULT0]]) - // CHECK-UNCONSTRAINED: fneg <4 x float> [[RESULT1]] + // CHECK-UNCONSTRAINED: call <4 x float> @llvm.ppc.fnmsub.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}) // CHECK-CONSTRAINED: [[RESULT0:%[^ ]+]] = fneg <4 x float> %{{.*}} // CHECK-CONSTRAINED: [[RESULT1:%[^ ]+]] = call <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> [[RESULT0]], metadata !"round.tonearest", metadata !"fpexcept.strict") // CHECK-CONSTRAINED: fneg <4 x float> [[RESULT1]] @@ -152,9 +150,7 @@ vd = __builtin_vsx_xvnmsubadp(vd, vd, vd); // CHECK-LABEL: try-xvnmsubadp - // CHECK-UNCONSTRAINED: [[RESULT0:%[^ ]+]] = fneg <2 x double> %{{.*}} - // CHECK-UNCONSTRAINED: [[RESULT1:%[^ ]+]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> [[RESULT0]]) - // CHECK-UNCONSTRAINED: fneg <2 x double> [[RESULT1]] + // CHECK-UNCONSTRAINED: call <2 x double> @llvm.ppc.fnmsub.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}}) // CHECK-CONSTRAINED: [[RESULT0:%[^ ]+]] = fneg <2 x double> %{{.*}} // CHECK-CONSTRAINED: [[RESULT1:%[^ ]+]] = call <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x double> [[RESULT0]], metadata !"round.tonearest", metadata !"fpexcept.strict") // CHECK-CONSTRAINED: fneg <2 x double> [[RESULT1]] diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-vsx.c b/clang/test/CodeGen/PowerPC/builtins-ppc-vsx.c --- a/clang/test/CodeGen/PowerPC/builtins-ppc-vsx.c +++ b/clang/test/CodeGen/PowerPC/builtins-ppc-vsx.c @@ -894,20 +894,12 @@ // CHECK-LE-NEXT: fneg <2 x double> %[[FM]] res_vf = vec_nmsub(vf, vf, vf); -// CHECK: fneg <4 x float> %{{[0-9]+}} -// CHECK-NEXT: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}}, <4 x float> -// CHECK: fneg <4 x float> %{{[0-9]+}} -// CHECK-LE: fneg <4 x float> %{{[0-9]+}} -// CHECK-LE-NEXT: call <4 x float> @llvm.fma.v4f32(<4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}}, <4 x float> -// CHECK-LE: fneg <4 x float> %{{[0-9]+}} +// CHECK: call <4 x float> @llvm.ppc.fnmsub.v4f32(<4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}}, <4 x float> +// CHECK-LE: call <4 x float> @llvm.ppc.fnmsub.v4f32(<4 x float> %{{[0-9]+}}, <4 x float> %{{[0-9]+}}, <4 x float> res_vd = vec_nmsub(vd, vd, vd); -// CHECK: fneg <2 x double> %{{[0-9]+}} -// CHECK-NEXT: [[FM:[0-9]+]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}}, <2 x double> -// CHECK-NEXT: fneg <2 x double> %[[FM]] -// CHECK-LE: fneg <2 x double> %{{[0-9]+}} -// CHECK-LE-NEXT: [[FM:[0-9]+]] = call <2 x double> @llvm.fma.v2f64(<2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}}, <2 x double> -// CHECK-LE-NEXT: fneg <2 x double> %[[FM]] +// CHECK: [[FM:[0-9]+]] = call <2 x double> @llvm.ppc.fnmsub.v2f64(<2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}}, <2 x double> +// CHECK-LE: [[FM:[0-9]+]] = call <2 x double> @llvm.ppc.fnmsub.v2f64(<2 x double> %{{[0-9]+}}, <2 x double> %{{[0-9]+}}, <2 x double> /* vec_nor */ res_vsll = vec_nor(vsll, vsll); diff --git a/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-math.c b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-math.c --- a/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-math.c +++ b/clang/test/CodeGen/PowerPC/builtins-ppc-xlcompat-math.c @@ -95,10 +95,11 @@ // CHECK-LABEL: @fnmsub( // CHECK: [[D_ADDR:%.*]] = alloca double, align 8 // CHECK-NEXT: store double [[D:%.*]], double* [[D_ADDR]], align 8 +// CHECK-COUNT-3: load double, double* [[D_ADDR]], align 8 // CHECK-NEXT: [[TMP0:%.*]] = load double, double* [[D_ADDR]], align 8 // CHECK-NEXT: [[TMP1:%.*]] = load double, double* [[D_ADDR]], align 8 // CHECK-NEXT: [[TMP2:%.*]] = load double, double* [[D_ADDR]], align 8 -// CHECK-NEXT: [[TMP3:%.*]] = call double @llvm.ppc.fnmsub(double [[TMP0]], double [[TMP1]], double [[TMP2]]) +// CHECK-NEXT: [[TMP3:%.*]] = call double @llvm.ppc.fnmsub.f64(double [[TMP0]], double [[TMP1]], double [[TMP2]]) // CHECK-NEXT: ret double [[TMP3]] // double fnmsub (double d) { @@ -108,10 +109,11 @@ // CHECK-LABEL: @fnmsubs( // CHECK: [[F_ADDR:%.*]] = alloca float, align 4 // CHECK-NEXT: store float [[F:%.*]], float* [[F_ADDR]], align 4 +// CHECK-COUNT-3: load float, float* [[F_ADDR]], align 4 // CHECK-NEXT: [[TMP0:%.*]] = load float, float* [[F_ADDR]], align 4 // CHECK-NEXT: [[TMP1:%.*]] = load float, float* [[F_ADDR]], align 4 // CHECK-NEXT: [[TMP2:%.*]] = load float, float* [[F_ADDR]], align 4 -// CHECK-NEXT: [[TMP3:%.*]] = call float @llvm.ppc.fnmsubs(float [[TMP0]], float [[TMP1]], float [[TMP2]]) +// CHECK-NEXT: [[TMP3:%.*]] = call float @llvm.ppc.fnmsub.f32(float [[TMP0]], float [[TMP1]], float [[TMP2]]) // CHECK-NEXT: ret float [[TMP3]] // float fnmsubs (float f) { diff --git a/clang/test/Modules/Inputs/AddRemoveIrrelevantModuleMap/a.modulemap b/clang/test/Modules/Inputs/AddRemoveIrrelevantModuleMap/a.modulemap deleted file mode 100644 --- a/clang/test/Modules/Inputs/AddRemoveIrrelevantModuleMap/a.modulemap +++ /dev/null @@ -1 +0,0 @@ -module a { } diff --git a/clang/test/Modules/Inputs/AddRemoveIrrelevantModuleMap/b.modulemap b/clang/test/Modules/Inputs/AddRemoveIrrelevantModuleMap/b.modulemap deleted file mode 100644 --- a/clang/test/Modules/Inputs/AddRemoveIrrelevantModuleMap/b.modulemap +++ /dev/null @@ -1 +0,0 @@ -module b { } diff --git a/clang/test/Modules/add-remove-irrelevant-module-map.m b/clang/test/Modules/add-remove-irrelevant-module-map.m --- a/clang/test/Modules/add-remove-irrelevant-module-map.m +++ b/clang/test/Modules/add-remove-irrelevant-module-map.m @@ -1,16 +1,58 @@ -// RUN: rm -rf %t -// RUN: rm -rf %t.mcp -// RUN: mkdir -p %t +// RUN: rm -rf %t && mkdir %t +// RUN: split-file %s %t -// Build without b.modulemap -// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t.mcp -fdisable-module-hash -fmodule-map-file=%S/Inputs/AddRemoveIrrelevantModuleMap/a.modulemap %s -verify -// RUN: cp %t.mcp/a.pcm %t/a.pcm +//--- a.modulemap +module a {} -// Build with b.modulemap -// RUN: rm -rf %t.mcp -// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t.mcp -fdisable-module-hash -fmodule-map-file=%S/Inputs/AddRemoveIrrelevantModuleMap/a.modulemap -fmodule-map-file=%S/Inputs/AddRemoveIrrelevantModuleMap/b.modulemap %s -verify -// RUN: not diff %t.mcp/a.pcm %t/a.pcm +//--- b.modulemap +module b {} +//--- test-simple.m // expected-no-diagnostics - @import a; + +// Build without b.modulemap: +// +// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t/cache -fdisable-module-hash \ +// RUN: -fmodule-map-file=%t/a.modulemap %t/test-simple.m -verify +// RUN: mv %t/cache %t/cache-without-b + +// Build with b.modulemap: +// +// RUN: %clang_cc1 -fmodules -fimplicit-module-maps -fmodules-cache-path=%t/cache -fdisable-module-hash \ +// RUN: -fmodule-map-file=%t/a.modulemap -fmodule-map-file=%t/b.modulemap %t/test-simple.m -verify +// RUN: mv %t/cache %t/cache-with-b + +// Neither PCM file considers 'b.modulemap' an input: +// +// RUN: %clang_cc1 -module-file-info %t/cache-without-b/a.pcm | FileCheck %s --check-prefix=CHECK-B +// RUN: %clang_cc1 -module-file-info %t/cache-with-b/a.pcm | FileCheck %s --check-prefix=CHECK-B +// CHECK-B-NOT: Input file: {{.*}}b.modulemap + +//--- c.modulemap +module c [no_undeclared_includes] { header "c.h" } + +//--- c.h +#if __has_include("d.h") // This should use 'd.modulemap' in order to determine that 'd.h' + // doesn't exist for 'c' because of its '[no_undeclared_includes]'. +#endif + +//--- d.modulemap +module d { header "d.h" } + +//--- d.h +// empty + +//--- test-no-undeclared-includes.m +// expected-no-diagnostics +@import c; + +// RUN: %clang_cc1 -fmodules -fmodules-cache-path=%t/cache -fdisable-module-hash \ +// RUN: -fmodule-map-file=%t/c.modulemap -fmodule-map-file=%t/d.modulemap \ +// RUN: %t/test-no-undeclared-includes.m -verify + +// The PCM file considers 'd.modulemap' an input because it affects the compilation, +// although it doesn't describe the built module or its imports. +// +// RUN: %clang_cc1 -module-file-info %t/cache/c.pcm | FileCheck %s --check-prefix=CHECK-D +// CHECK-D: Input file: {{.*}}d.modulemap diff --git a/clang/test/OpenMP/irbuilder_nested_openmp_parallel_empty.c b/clang/test/OpenMP/irbuilder_nested_openmp_parallel_empty.c --- a/clang/test/OpenMP/irbuilder_nested_openmp_parallel_empty.c +++ b/clang/test/OpenMP/irbuilder_nested_openmp_parallel_empty.c @@ -33,8 +33,7 @@ // ALL-LABEL: @_Z17nested_parallel_1Pfid( // ALL-NEXT: entry: -// ALL-NEXT: [[STRUCTARG14:%.*]] = alloca { { i32*, double*, float** }*, i32*, double*, float** }, align 8 -// ALL-NEXT: [[STRUCTARG:%.*]] = alloca { i32*, double*, float** }, align 8 +// ALL-NEXT: [[STRUCTARG14:%.*]] = alloca { i32*, double*, float** }, align 8 // ALL-NEXT: [[R_ADDR:%.*]] = alloca float*, align 8 // ALL-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // ALL-NEXT: [[B_ADDR:%.*]] = alloca double, align 8 @@ -44,15 +43,13 @@ // ALL-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // ALL-NEXT: br label [[OMP_PARALLEL:%.*]] // ALL: omp_parallel: -// ALL-NEXT: [[GEP_STRUCTARG:%.*]] = getelementptr { { i32*, double*, float** }*, i32*, double*, float** }, { { i32*, double*, float** }*, i32*, double*, float** }* [[STRUCTARG14]], i32 0, i32 0 -// ALL-NEXT: store { i32*, double*, float** }* [[STRUCTARG]], { i32*, double*, float** }** [[GEP_STRUCTARG]], align 8 -// ALL-NEXT: [[GEP_A_ADDR15:%.*]] = getelementptr { { i32*, double*, float** }*, i32*, double*, float** }, { { i32*, double*, float** }*, i32*, double*, float** }* [[STRUCTARG14]], i32 0, i32 1 +// ALL-NEXT: [[GEP_A_ADDR15:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG14]], i32 0, i32 0 // ALL-NEXT: store i32* [[A_ADDR]], i32** [[GEP_A_ADDR15]], align 8 -// ALL-NEXT: [[GEP_B_ADDR16:%.*]] = getelementptr { { i32*, double*, float** }*, i32*, double*, float** }, { { i32*, double*, float** }*, i32*, double*, float** }* [[STRUCTARG14]], i32 0, i32 2 +// ALL-NEXT: [[GEP_B_ADDR16:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG14]], i32 0, i32 1 // ALL-NEXT: store double* [[B_ADDR]], double** [[GEP_B_ADDR16]], align 8 -// ALL-NEXT: [[GEP_R_ADDR17:%.*]] = getelementptr { { i32*, double*, float** }*, i32*, double*, float** }, { { i32*, double*, float** }*, i32*, double*, float** }* [[STRUCTARG14]], i32 0, i32 3 +// ALL-NEXT: [[GEP_R_ADDR17:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG14]], i32 0, i32 2 // ALL-NEXT: store float** [[R_ADDR]], float*** [[GEP_R_ADDR17]], align 8 -// ALL-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { { i32*, double*, float** }*, i32*, double*, float** }*)* @_Z17nested_parallel_1Pfid..omp_par.2 to void (i32*, i32*, ...)*), { { i32*, double*, float** }*, i32*, double*, float** }* [[STRUCTARG14]]) +// ALL-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, double*, float** }*)* @_Z17nested_parallel_1Pfid..omp_par.2 to void (i32*, i32*, ...)*), { i32*, double*, float** }* [[STRUCTARG14]]) // ALL-NEXT: br label [[OMP_PAR_OUTLINED_EXIT13:%.*]] // ALL: omp.par.outlined.exit13: // ALL-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -71,9 +68,6 @@ // ALL-LABEL: @_Z17nested_parallel_2Pfid( // ALL-NEXT: entry: -// ALL-NEXT: [[STRUCTARG68:%.*]] = alloca { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }, align 8 -// ALL-NEXT: [[STRUCTARG64:%.*]] = alloca { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }, align 8 -// ALL-NEXT: [[STRUCTARG59:%.*]] = alloca { i32*, double*, float** }, align 8 // ALL-NEXT: [[STRUCTARG:%.*]] = alloca { i32*, double*, float** }, align 8 // ALL-NEXT: [[R_ADDR:%.*]] = alloca float*, align 8 // ALL-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 @@ -84,19 +78,13 @@ // ALL-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // ALL-NEXT: br label [[OMP_PARALLEL:%.*]] // ALL: omp_parallel: -// ALL-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }, { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }* [[STRUCTARG68]], i32 0, i32 0 +// ALL-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG]], i32 0, i32 0 // ALL-NEXT: store i32* [[A_ADDR]], i32** [[GEP_A_ADDR]], align 8 -// ALL-NEXT: [[GEP_B_ADDR:%.*]] = getelementptr { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }, { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }* [[STRUCTARG68]], i32 0, i32 1 +// ALL-NEXT: [[GEP_B_ADDR:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG]], i32 0, i32 1 // ALL-NEXT: store double* [[B_ADDR]], double** [[GEP_B_ADDR]], align 8 -// ALL-NEXT: [[GEP_R_ADDR:%.*]] = getelementptr { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }, { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }* [[STRUCTARG68]], i32 0, i32 2 +// ALL-NEXT: [[GEP_R_ADDR:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG]], i32 0, i32 2 // ALL-NEXT: store float** [[R_ADDR]], float*** [[GEP_R_ADDR]], align 8 -// ALL-NEXT: [[GEP_STRUCTARG64:%.*]] = getelementptr { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }, { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }* [[STRUCTARG68]], i32 0, i32 3 -// ALL-NEXT: store { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }* [[STRUCTARG64]], { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }** [[GEP_STRUCTARG64]], align 8 -// ALL-NEXT: [[GEP_STRUCTARG69:%.*]] = getelementptr { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }, { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }* [[STRUCTARG68]], i32 0, i32 4 -// ALL-NEXT: store { i32*, double*, float** }* [[STRUCTARG]], { i32*, double*, float** }** [[GEP_STRUCTARG69]], align 8 -// ALL-NEXT: [[GEP_STRUCTARG5970:%.*]] = getelementptr { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }, { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }* [[STRUCTARG68]], i32 0, i32 5 -// ALL-NEXT: store { i32*, double*, float** }* [[STRUCTARG59]], { i32*, double*, float** }** [[GEP_STRUCTARG5970]], align 8 -// ALL-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }*)* @_Z17nested_parallel_2Pfid..omp_par.5 to void (i32*, i32*, ...)*), { i32*, double*, float**, { i32*, double*, float**, { i32*, double*, float** }*, { i32*, double*, float** }* }*, { i32*, double*, float** }*, { i32*, double*, float** }* }* [[STRUCTARG68]]) +// ALL-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, double*, float** }*)* @_Z17nested_parallel_2Pfid..omp_par.5 to void (i32*, i32*, ...)*), { i32*, double*, float** }* [[STRUCTARG]]) // ALL-NEXT: br label [[OMP_PAR_OUTLINED_EXIT55:%.*]] // ALL: omp.par.outlined.exit55: // ALL-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] diff --git a/clang/test/OpenMP/irbuilder_nested_parallel_for.c b/clang/test/OpenMP/irbuilder_nested_parallel_for.c --- a/clang/test/OpenMP/irbuilder_nested_parallel_for.c +++ b/clang/test/OpenMP/irbuilder_nested_parallel_for.c @@ -44,8 +44,7 @@ // CHECK-LABEL: @_Z14parallel_for_1Pfid( // CHECK-NEXT: entry: -// CHECK-NEXT: [[STRUCTARG17:%.*]] = alloca { { i32*, double*, float** }*, i32*, double*, float** }, align 8 -// CHECK-NEXT: [[STRUCTARG:%.*]] = alloca { i32*, double*, float** }, align 8 +// CHECK-NEXT: [[STRUCTARG17:%.*]] = alloca { i32*, double*, float** }, align 8 // CHECK-NEXT: [[R_ADDR:%.*]] = alloca float*, align 8 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK-NEXT: [[B_ADDR:%.*]] = alloca double, align 8 @@ -55,15 +54,13 @@ // CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] // CHECK: omp_parallel: -// CHECK-NEXT: [[GEP_STRUCTARG:%.*]] = getelementptr { { i32*, double*, float** }*, i32*, double*, float** }, { { i32*, double*, float** }*, i32*, double*, float** }* [[STRUCTARG17]], i32 0, i32 0 -// CHECK-NEXT: store { i32*, double*, float** }* [[STRUCTARG]], { i32*, double*, float** }** [[GEP_STRUCTARG]], align 8 -// CHECK-NEXT: [[GEP_A_ADDR18:%.*]] = getelementptr { { i32*, double*, float** }*, i32*, double*, float** }, { { i32*, double*, float** }*, i32*, double*, float** }* [[STRUCTARG17]], i32 0, i32 1 +// CHECK-NEXT: [[GEP_A_ADDR18:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG17]], i32 0, i32 0 // CHECK-NEXT: store i32* [[A_ADDR]], i32** [[GEP_A_ADDR18]], align 8 -// CHECK-NEXT: [[GEP_B_ADDR19:%.*]] = getelementptr { { i32*, double*, float** }*, i32*, double*, float** }, { { i32*, double*, float** }*, i32*, double*, float** }* [[STRUCTARG17]], i32 0, i32 2 +// CHECK-NEXT: [[GEP_B_ADDR19:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG17]], i32 0, i32 1 // CHECK-NEXT: store double* [[B_ADDR]], double** [[GEP_B_ADDR19]], align 8 -// CHECK-NEXT: [[GEP_R_ADDR20:%.*]] = getelementptr { { i32*, double*, float** }*, i32*, double*, float** }, { { i32*, double*, float** }*, i32*, double*, float** }* [[STRUCTARG17]], i32 0, i32 3 +// CHECK-NEXT: [[GEP_R_ADDR20:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG17]], i32 0, i32 2 // CHECK-NEXT: store float** [[R_ADDR]], float*** [[GEP_R_ADDR20]], align 8 -// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { { i32*, double*, float** }*, i32*, double*, float** }*)* @_Z14parallel_for_1Pfid..omp_par.4 to void (i32*, i32*, ...)*), { { i32*, double*, float** }*, i32*, double*, float** }* [[STRUCTARG17]]) +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, double*, float** }*)* @_Z14parallel_for_1Pfid..omp_par.4 to void (i32*, i32*, ...)*), { i32*, double*, float** }* [[STRUCTARG17]]) // CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT16:%.*]] // CHECK: omp.par.outlined.exit16: // CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -72,34 +69,31 @@ // // CHECK-DEBUG-LABEL: @_Z14parallel_for_1Pfid( // CHECK-DEBUG-NEXT: entry: -// CHECK-DEBUG-NEXT: [[STRUCTARG17:%.*]] = alloca { { i32*, double*, float** }*, i32*, double*, float** }, align 8 -// CHECK-DEBUG-NEXT: [[STRUCTARG:%.*]] = alloca { i32*, double*, float** }, align 8 +// CHECK-DEBUG-NEXT: [[STRUCTARG17:%.*]] = alloca { i32*, double*, float** }, align 8 // CHECK-DEBUG-NEXT: [[R_ADDR:%.*]] = alloca float*, align 8 // CHECK-DEBUG-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[B_ADDR:%.*]] = alloca double, align 8 // CHECK-DEBUG-NEXT: store float* [[R:%.*]], float** [[R_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata float** [[R_ADDR]], metadata [[META73:![0-9]+]], metadata !DIExpression()), !dbg [[DBG74:![0-9]+]] +// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata float** [[R_ADDR]], metadata [[META72:![0-9]+]], metadata !DIExpression()), !dbg [[DBG73:![0-9]+]] // CHECK-DEBUG-NEXT: store i32 [[A:%.*]], i32* [[A_ADDR]], align 4 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META75:![0-9]+]], metadata !DIExpression()), !dbg [[DBG76:![0-9]+]] +// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META74:![0-9]+]], metadata !DIExpression()), !dbg [[DBG75:![0-9]+]] // CHECK-DEBUG-NEXT: store double [[B:%.*]], double* [[B_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata double* [[B_ADDR]], metadata [[META77:![0-9]+]], metadata !DIExpression()), !dbg [[DBG78:![0-9]+]] -// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]), !dbg [[DBG79:![0-9]+]] +// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata double* [[B_ADDR]], metadata [[META76:![0-9]+]], metadata !DIExpression()), !dbg [[DBG77:![0-9]+]] +// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB6:[0-9]+]]), !dbg [[DBG78:![0-9]+]] // CHECK-DEBUG-NEXT: br label [[OMP_PARALLEL:%.*]] // CHECK-DEBUG: omp_parallel: -// CHECK-DEBUG-NEXT: [[GEP_STRUCTARG:%.*]] = getelementptr { { i32*, double*, float** }*, i32*, double*, float** }, { { i32*, double*, float** }*, i32*, double*, float** }* [[STRUCTARG17]], i32 0, i32 0 -// CHECK-DEBUG-NEXT: store { i32*, double*, float** }* [[STRUCTARG]], { i32*, double*, float** }** [[GEP_STRUCTARG]], align 8 -// CHECK-DEBUG-NEXT: [[GEP_A_ADDR18:%.*]] = getelementptr { { i32*, double*, float** }*, i32*, double*, float** }, { { i32*, double*, float** }*, i32*, double*, float** }* [[STRUCTARG17]], i32 0, i32 1 +// CHECK-DEBUG-NEXT: [[GEP_A_ADDR18:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG17]], i32 0, i32 0 // CHECK-DEBUG-NEXT: store i32* [[A_ADDR]], i32** [[GEP_A_ADDR18]], align 8 -// CHECK-DEBUG-NEXT: [[GEP_B_ADDR19:%.*]] = getelementptr { { i32*, double*, float** }*, i32*, double*, float** }, { { i32*, double*, float** }*, i32*, double*, float** }* [[STRUCTARG17]], i32 0, i32 2 +// CHECK-DEBUG-NEXT: [[GEP_B_ADDR19:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG17]], i32 0, i32 1 // CHECK-DEBUG-NEXT: store double* [[B_ADDR]], double** [[GEP_B_ADDR19]], align 8 -// CHECK-DEBUG-NEXT: [[GEP_R_ADDR20:%.*]] = getelementptr { { i32*, double*, float** }*, i32*, double*, float** }, { { i32*, double*, float** }*, i32*, double*, float** }* [[STRUCTARG17]], i32 0, i32 3 +// CHECK-DEBUG-NEXT: [[GEP_R_ADDR20:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG17]], i32 0, i32 2 // CHECK-DEBUG-NEXT: store float** [[R_ADDR]], float*** [[GEP_R_ADDR20]], align 8 -// CHECK-DEBUG-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB6]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { { i32*, double*, float** }*, i32*, double*, float** }*)* @_Z14parallel_for_1Pfid..omp_par.4 to void (i32*, i32*, ...)*), { { i32*, double*, float** }*, i32*, double*, float** }* [[STRUCTARG17]]), !dbg [[DBG80:![0-9]+]] +// CHECK-DEBUG-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB6]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, double*, float** }*)* @_Z14parallel_for_1Pfid..omp_par.4 to void (i32*, i32*, ...)*), { i32*, double*, float** }* [[STRUCTARG17]]), !dbg [[DBG79:![0-9]+]] // CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT16:%.*]] // CHECK-DEBUG: omp.par.outlined.exit16: // CHECK-DEBUG-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] // CHECK-DEBUG: omp.par.exit.split: -// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG82:![0-9]+]] +// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG81:![0-9]+]] // void parallel_for_1(float *r, int a, double b) { #pragma omp parallel @@ -116,9 +110,6 @@ // CHECK-LABEL: @_Z14parallel_for_2Pfid( // CHECK-NEXT: entry: -// CHECK-NEXT: [[STRUCTARG218:%.*]] = alloca { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }, align 8 -// CHECK-NEXT: [[STRUCTARG214:%.*]] = alloca { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }, align 8 -// CHECK-NEXT: [[STRUCTARG209:%.*]] = alloca { i32*, double*, float** }, align 8 // CHECK-NEXT: [[STRUCTARG:%.*]] = alloca { i32*, double*, float** }, align 8 // CHECK-NEXT: [[R_ADDR:%.*]] = alloca float*, align 8 // CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 @@ -137,19 +128,13 @@ // CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK-NEXT: br label [[OMP_PARALLEL:%.*]] // CHECK: omp_parallel: -// CHECK-NEXT: [[GEP_STRUCTARG214:%.*]] = getelementptr { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }, { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }* [[STRUCTARG218]], i32 0, i32 0 -// CHECK-NEXT: store { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }* [[STRUCTARG214]], { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }** [[GEP_STRUCTARG214]], align 8 -// CHECK-NEXT: [[GEP_STRUCTARG219:%.*]] = getelementptr { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }, { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }* [[STRUCTARG218]], i32 0, i32 1 -// CHECK-NEXT: store { i32*, double*, float** }* [[STRUCTARG]], { i32*, double*, float** }** [[GEP_STRUCTARG219]], align 8 -// CHECK-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }, { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }* [[STRUCTARG218]], i32 0, i32 2 +// CHECK-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG]], i32 0, i32 0 // CHECK-NEXT: store i32* [[A_ADDR]], i32** [[GEP_A_ADDR]], align 8 -// CHECK-NEXT: [[GEP_B_ADDR:%.*]] = getelementptr { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }, { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }* [[STRUCTARG218]], i32 0, i32 3 +// CHECK-NEXT: [[GEP_B_ADDR:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG]], i32 0, i32 1 // CHECK-NEXT: store double* [[B_ADDR]], double** [[GEP_B_ADDR]], align 8 -// CHECK-NEXT: [[GEP_R_ADDR:%.*]] = getelementptr { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }, { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }* [[STRUCTARG218]], i32 0, i32 4 +// CHECK-NEXT: [[GEP_R_ADDR:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG]], i32 0, i32 2 // CHECK-NEXT: store float** [[R_ADDR]], float*** [[GEP_R_ADDR]], align 8 -// CHECK-NEXT: [[GEP_STRUCTARG209220:%.*]] = getelementptr { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }, { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }* [[STRUCTARG218]], i32 0, i32 5 -// CHECK-NEXT: store { i32*, double*, float** }* [[STRUCTARG209]], { i32*, double*, float** }** [[GEP_STRUCTARG209220]], align 8 -// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*)* @_Z14parallel_for_2Pfid..omp_par.23 to void (i32*, i32*, ...)*), { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }* [[STRUCTARG218]]) +// CHECK-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, double*, float** }*)* @_Z14parallel_for_2Pfid..omp_par.23 to void (i32*, i32*, ...)*), { i32*, double*, float** }* [[STRUCTARG]]) // CHECK-NEXT: br label [[OMP_PAR_OUTLINED_EXIT184:%.*]] // CHECK: omp.par.outlined.exit184: // CHECK-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] @@ -205,9 +190,6 @@ // // CHECK-DEBUG-LABEL: @_Z14parallel_for_2Pfid( // CHECK-DEBUG-NEXT: entry: -// CHECK-DEBUG-NEXT: [[STRUCTARG218:%.*]] = alloca { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }, align 8 -// CHECK-DEBUG-NEXT: [[STRUCTARG214:%.*]] = alloca { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }, align 8 -// CHECK-DEBUG-NEXT: [[STRUCTARG209:%.*]] = alloca { i32*, double*, float** }, align 8 // CHECK-DEBUG-NEXT: [[STRUCTARG:%.*]] = alloca { i32*, double*, float** }, align 8 // CHECK-DEBUG-NEXT: [[R_ADDR:%.*]] = alloca float*, align 8 // CHECK-DEBUG-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 @@ -221,80 +203,74 @@ // CHECK-DEBUG-NEXT: [[P_UPPERBOUND205:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: [[P_STRIDE206:%.*]] = alloca i32, align 4 // CHECK-DEBUG-NEXT: store float* [[R:%.*]], float** [[R_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata float** [[R_ADDR]], metadata [[META134:![0-9]+]], metadata !DIExpression()), !dbg [[DBG135:![0-9]+]] +// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata float** [[R_ADDR]], metadata [[META133:![0-9]+]], metadata !DIExpression()), !dbg [[DBG134:![0-9]+]] // CHECK-DEBUG-NEXT: store i32 [[A:%.*]], i32* [[A_ADDR]], align 4 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META136:![0-9]+]], metadata !DIExpression()), !dbg [[DBG137:![0-9]+]] +// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata i32* [[A_ADDR]], metadata [[META135:![0-9]+]], metadata !DIExpression()), !dbg [[DBG136:![0-9]+]] // CHECK-DEBUG-NEXT: store double [[B:%.*]], double* [[B_ADDR]], align 8 -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata double* [[B_ADDR]], metadata [[META138:![0-9]+]], metadata !DIExpression()), !dbg [[DBG139:![0-9]+]] -// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB13:[0-9]+]]), !dbg [[DBG140:![0-9]+]] +// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata double* [[B_ADDR]], metadata [[META137:![0-9]+]], metadata !DIExpression()), !dbg [[DBG138:![0-9]+]] +// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB13:[0-9]+]]), !dbg [[DBG139:![0-9]+]] // CHECK-DEBUG-NEXT: br label [[OMP_PARALLEL:%.*]] // CHECK-DEBUG: omp_parallel: -// CHECK-DEBUG-NEXT: [[GEP_STRUCTARG214:%.*]] = getelementptr { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }, { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }* [[STRUCTARG218]], i32 0, i32 0 -// CHECK-DEBUG-NEXT: store { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }* [[STRUCTARG214]], { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }** [[GEP_STRUCTARG214]], align 8 -// CHECK-DEBUG-NEXT: [[GEP_STRUCTARG219:%.*]] = getelementptr { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }, { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }* [[STRUCTARG218]], i32 0, i32 1 -// CHECK-DEBUG-NEXT: store { i32*, double*, float** }* [[STRUCTARG]], { i32*, double*, float** }** [[GEP_STRUCTARG219]], align 8 -// CHECK-DEBUG-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }, { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }* [[STRUCTARG218]], i32 0, i32 2 +// CHECK-DEBUG-NEXT: [[GEP_A_ADDR:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG]], i32 0, i32 0 // CHECK-DEBUG-NEXT: store i32* [[A_ADDR]], i32** [[GEP_A_ADDR]], align 8 -// CHECK-DEBUG-NEXT: [[GEP_B_ADDR:%.*]] = getelementptr { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }, { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }* [[STRUCTARG218]], i32 0, i32 3 +// CHECK-DEBUG-NEXT: [[GEP_B_ADDR:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG]], i32 0, i32 1 // CHECK-DEBUG-NEXT: store double* [[B_ADDR]], double** [[GEP_B_ADDR]], align 8 -// CHECK-DEBUG-NEXT: [[GEP_R_ADDR:%.*]] = getelementptr { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }, { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }* [[STRUCTARG218]], i32 0, i32 4 +// CHECK-DEBUG-NEXT: [[GEP_R_ADDR:%.*]] = getelementptr { i32*, double*, float** }, { i32*, double*, float** }* [[STRUCTARG]], i32 0, i32 2 // CHECK-DEBUG-NEXT: store float** [[R_ADDR]], float*** [[GEP_R_ADDR]], align 8 -// CHECK-DEBUG-NEXT: [[GEP_STRUCTARG209220:%.*]] = getelementptr { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }, { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }* [[STRUCTARG218]], i32 0, i32 5 -// CHECK-DEBUG-NEXT: store { i32*, double*, float** }* [[STRUCTARG209]], { i32*, double*, float** }** [[GEP_STRUCTARG209220]], align 8 -// CHECK-DEBUG-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB13]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*)* @_Z14parallel_for_2Pfid..omp_par.23 to void (i32*, i32*, ...)*), { { { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }*, { i32*, double*, float** }*, i32*, double*, float**, { i32*, double*, float** }* }* [[STRUCTARG218]]), !dbg [[DBG141:![0-9]+]] +// CHECK-DEBUG-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB13]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i32*, double*, float** }*)* @_Z14parallel_for_2Pfid..omp_par.23 to void (i32*, i32*, ...)*), { i32*, double*, float** }* [[STRUCTARG]]), !dbg [[DBG140:![0-9]+]] // CHECK-DEBUG-NEXT: br label [[OMP_PAR_OUTLINED_EXIT184:%.*]] // CHECK-DEBUG: omp.par.outlined.exit184: // CHECK-DEBUG-NEXT: br label [[OMP_PAR_EXIT_SPLIT:%.*]] // CHECK-DEBUG: omp.par.exit.split: -// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata i32* [[I185]], metadata [[META145:![0-9]+]], metadata !DIExpression()), !dbg [[DBG148:![0-9]+]] -// CHECK-DEBUG-NEXT: store i32 0, i32* [[I185]], align 4, !dbg [[DBG148]] -// CHECK-DEBUG-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[AGG_CAPTURED186]], i32 0, i32 0, !dbg [[DBG149:![0-9]+]] -// CHECK-DEBUG-NEXT: store i32* [[I185]], i32** [[TMP0]], align 8, !dbg [[DBG149]] -// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[AGG_CAPTURED187]], i32 0, i32 0, !dbg [[DBG149]] -// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, i32* [[I185]], align 4, !dbg [[DBG150:![0-9]+]] -// CHECK-DEBUG-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4, !dbg [[DBG149]] -// CHECK-DEBUG-NEXT: call void @__captured_stmt.19(i32* [[DOTCOUNT_ADDR188]], %struct.anon.17* [[AGG_CAPTURED186]]), !dbg [[DBG149]] -// CHECK-DEBUG-NEXT: [[DOTCOUNT189:%.*]] = load i32, i32* [[DOTCOUNT_ADDR188]], align 4, !dbg [[DBG149]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER190:%.*]], !dbg [[DBG149]] +// CHECK-DEBUG-NEXT: call void @llvm.dbg.declare(metadata i32* [[I185]], metadata [[META144:![0-9]+]], metadata !DIExpression()), !dbg [[DBG147:![0-9]+]] +// CHECK-DEBUG-NEXT: store i32 0, i32* [[I185]], align 4, !dbg [[DBG147]] +// CHECK-DEBUG-NEXT: [[TMP0:%.*]] = getelementptr inbounds [[STRUCT_ANON_17]], %struct.anon.17* [[AGG_CAPTURED186]], i32 0, i32 0, !dbg [[DBG148:![0-9]+]] +// CHECK-DEBUG-NEXT: store i32* [[I185]], i32** [[TMP0]], align 8, !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_ANON_18]], %struct.anon.18* [[AGG_CAPTURED187]], i32 0, i32 0, !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: [[TMP2:%.*]] = load i32, i32* [[I185]], align 4, !dbg [[DBG149:![0-9]+]] +// CHECK-DEBUG-NEXT: store i32 [[TMP2]], i32* [[TMP1]], align 4, !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: call void @__captured_stmt.19(i32* [[DOTCOUNT_ADDR188]], %struct.anon.17* [[AGG_CAPTURED186]]), !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: [[DOTCOUNT189:%.*]] = load i32, i32* [[DOTCOUNT_ADDR188]], align 4, !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_PREHEADER190:%.*]], !dbg [[DBG148]] // CHECK-DEBUG: omp_loop.preheader190: -// CHECK-DEBUG-NEXT: store i32 0, i32* [[P_LOWERBOUND204]], align 4, !dbg [[DBG149]] -// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = sub i32 [[DOTCOUNT189]], 1, !dbg [[DBG149]] -// CHECK-DEBUG-NEXT: store i32 [[TMP3]], i32* [[P_UPPERBOUND205]], align 4, !dbg [[DBG149]] -// CHECK-DEBUG-NEXT: store i32 1, i32* [[P_STRIDE206]], align 4, !dbg [[DBG149]] -// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM207:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB42:[0-9]+]]), !dbg [[DBG149]] -// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB42]], i32 [[OMP_GLOBAL_THREAD_NUM207]], i32 34, i32* [[P_LASTITER203]], i32* [[P_LOWERBOUND204]], i32* [[P_UPPERBOUND205]], i32* [[P_STRIDE206]], i32 1, i32 0), !dbg [[DBG149]] -// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, i32* [[P_LOWERBOUND204]], align 4, !dbg [[DBG149]] -// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, i32* [[P_UPPERBOUND205]], align 4, !dbg [[DBG149]] -// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = sub i32 [[TMP5]], [[TMP4]], !dbg [[DBG149]] -// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], 1, !dbg [[DBG149]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER191:%.*]], !dbg [[DBG149]] +// CHECK-DEBUG-NEXT: store i32 0, i32* [[P_LOWERBOUND204]], align 4, !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: [[TMP3:%.*]] = sub i32 [[DOTCOUNT189]], 1, !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: store i32 [[TMP3]], i32* [[P_UPPERBOUND205]], align 4, !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: store i32 1, i32* [[P_STRIDE206]], align 4, !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM207:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB42:[0-9]+]]), !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB42]], i32 [[OMP_GLOBAL_THREAD_NUM207]], i32 34, i32* [[P_LASTITER203]], i32* [[P_LOWERBOUND204]], i32* [[P_UPPERBOUND205]], i32* [[P_STRIDE206]], i32 1, i32 0), !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: [[TMP4:%.*]] = load i32, i32* [[P_LOWERBOUND204]], align 4, !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: [[TMP5:%.*]] = load i32, i32* [[P_UPPERBOUND205]], align 4, !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: [[TMP6:%.*]] = sub i32 [[TMP5]], [[TMP4]], !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], 1, !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER191:%.*]], !dbg [[DBG148]] // CHECK-DEBUG: omp_loop.header191: -// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV197:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER190]] ], [ [[OMP_LOOP_NEXT199:%.*]], [[OMP_LOOP_INC194:%.*]] ], !dbg [[DBG149]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND192:%.*]], !dbg [[DBG149]] +// CHECK-DEBUG-NEXT: [[OMP_LOOP_IV197:%.*]] = phi i32 [ 0, [[OMP_LOOP_PREHEADER190]] ], [ [[OMP_LOOP_NEXT199:%.*]], [[OMP_LOOP_INC194:%.*]] ], !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_COND192:%.*]], !dbg [[DBG148]] // CHECK-DEBUG: omp_loop.cond192: -// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP198:%.*]] = icmp ult i32 [[OMP_LOOP_IV197]], [[TMP7]], !dbg [[DBG149]] -// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP198]], label [[OMP_LOOP_BODY193:%.*]], label [[OMP_LOOP_EXIT195:%.*]], !dbg [[DBG149]] +// CHECK-DEBUG-NEXT: [[OMP_LOOP_CMP198:%.*]] = icmp ult i32 [[OMP_LOOP_IV197]], [[TMP7]], !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: br i1 [[OMP_LOOP_CMP198]], label [[OMP_LOOP_BODY193:%.*]], label [[OMP_LOOP_EXIT195:%.*]], !dbg [[DBG148]] // CHECK-DEBUG: omp_loop.body193: -// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = add i32 [[OMP_LOOP_IV197]], [[TMP4]], !dbg [[DBG151:![0-9]+]] -// CHECK-DEBUG-NEXT: call void @__captured_stmt.20(i32* [[I185]], i32 [[TMP8]], %struct.anon.18* [[AGG_CAPTURED187]]), !dbg [[DBG149]] -// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG152:![0-9]+]] -// CHECK-DEBUG-NEXT: [[CONV200:%.*]] = sitofp i32 [[TMP9]] to double, !dbg [[DBG152]] -// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load double, double* [[B_ADDR]], align 8, !dbg [[DBG151]] -// CHECK-DEBUG-NEXT: [[ADD201:%.*]] = fadd double [[CONV200]], [[TMP10]], !dbg [[DBG153:![0-9]+]] -// CHECK-DEBUG-NEXT: [[CONV202:%.*]] = fptrunc double [[ADD201]] to float, !dbg [[DBG152]] -// CHECK-DEBUG-NEXT: [[TMP11:%.*]] = load float*, float** [[R_ADDR]], align 8, !dbg [[DBG154:![0-9]+]] -// CHECK-DEBUG-NEXT: store float [[CONV202]], float* [[TMP11]], align 4, !dbg [[DBG155:![0-9]+]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC194]], !dbg [[DBG149]] +// CHECK-DEBUG-NEXT: [[TMP8:%.*]] = add i32 [[OMP_LOOP_IV197]], [[TMP4]], !dbg [[DBG150:![0-9]+]] +// CHECK-DEBUG-NEXT: call void @__captured_stmt.20(i32* [[I185]], i32 [[TMP8]], %struct.anon.18* [[AGG_CAPTURED187]]), !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4, !dbg [[DBG151:![0-9]+]] +// CHECK-DEBUG-NEXT: [[CONV200:%.*]] = sitofp i32 [[TMP9]] to double, !dbg [[DBG151]] +// CHECK-DEBUG-NEXT: [[TMP10:%.*]] = load double, double* [[B_ADDR]], align 8, !dbg [[DBG150]] +// CHECK-DEBUG-NEXT: [[ADD201:%.*]] = fadd double [[CONV200]], [[TMP10]], !dbg [[DBG152:![0-9]+]] +// CHECK-DEBUG-NEXT: [[CONV202:%.*]] = fptrunc double [[ADD201]] to float, !dbg [[DBG151]] +// CHECK-DEBUG-NEXT: [[TMP11:%.*]] = load float*, float** [[R_ADDR]], align 8, !dbg [[DBG153:![0-9]+]] +// CHECK-DEBUG-NEXT: store float [[CONV202]], float* [[TMP11]], align 4, !dbg [[DBG154:![0-9]+]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_INC194]], !dbg [[DBG148]] // CHECK-DEBUG: omp_loop.inc194: -// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT199]] = add nuw i32 [[OMP_LOOP_IV197]], 1, !dbg [[DBG149]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER191]], !dbg [[DBG149]] +// CHECK-DEBUG-NEXT: [[OMP_LOOP_NEXT199]] = add nuw i32 [[OMP_LOOP_IV197]], 1, !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_HEADER191]], !dbg [[DBG148]] // CHECK-DEBUG: omp_loop.exit195: -// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB42]], i32 [[OMP_GLOBAL_THREAD_NUM207]]), !dbg [[DBG149]] -// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM208:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB42]]), !dbg [[DBG151]] -// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB43:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM208]]), !dbg [[DBG151]] -// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER196:%.*]], !dbg [[DBG149]] +// CHECK-DEBUG-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB42]], i32 [[OMP_GLOBAL_THREAD_NUM207]]), !dbg [[DBG148]] +// CHECK-DEBUG-NEXT: [[OMP_GLOBAL_THREAD_NUM208:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB42]]), !dbg [[DBG150]] +// CHECK-DEBUG-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB43:[0-9]+]], i32 [[OMP_GLOBAL_THREAD_NUM208]]), !dbg [[DBG150]] +// CHECK-DEBUG-NEXT: br label [[OMP_LOOP_AFTER196:%.*]], !dbg [[DBG148]] // CHECK-DEBUG: omp_loop.after196: -// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG156:![0-9]+]] +// CHECK-DEBUG-NEXT: ret void, !dbg [[DBG155:![0-9]+]] // void parallel_for_2(float *r, int a, double b) { #pragma omp parallel diff --git a/clang/test/Syntax/lex.c b/clang/test/Syntax/lex.c --- a/clang/test/Syntax/lex.c +++ b/clang/test/Syntax/lex.c @@ -39,7 +39,7 @@ TOKEN-NEXT: raw_identifier 5:0 "endif" TOKEN-NEXT: r_brace 6:0 "}" flags=1 -RUN: clang-pseudo -source %s -print-pp-structure | FileCheck %s -check-prefix=PPS --strict-whitespace +RUN: clang-pseudo -source %s -print-directive-map | FileCheck %s -check-prefix=PPS --strict-whitespace PPS: code (5 tokens) PPS-NEXT: #ifndef (3 tokens) PPS-NEXT: code (4 tokens) diff --git a/clang/tools/clang-pseudo/ClangPseudo.cpp b/clang/tools/clang-pseudo/ClangPseudo.cpp --- a/clang/tools/clang-pseudo/ClangPseudo.cpp +++ b/clang/tools/clang-pseudo/ClangPseudo.cpp @@ -7,10 +7,10 @@ //===----------------------------------------------------------------------===// #include "clang/Basic/LangOptions.h" +#include "clang/Tooling/Syntax/Pseudo/DirectiveMap.h" #include "clang/Tooling/Syntax/Pseudo/Grammar.h" #include "clang/Tooling/Syntax/Pseudo/LRGraph.h" #include "clang/Tooling/Syntax/Pseudo/LRTable.h" -#include "clang/Tooling/Syntax/Pseudo/Preprocess.h" #include "clang/Tooling/Syntax/Pseudo/Token.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/CommandLine.h" @@ -33,8 +33,8 @@ static opt PrintSource("print-source", desc("Print token stream")); static opt PrintTokens("print-tokens", desc("Print detailed token info")); static opt - PrintPPStructure("print-pp-structure", - desc("Print directive structure of source code")); + PrintDirectiveMap("print-directive-map", + desc("Print directive structure of source code")); static std::string readOrDie(llvm::StringRef Path) { llvm::ErrorOr> Text = @@ -76,9 +76,9 @@ std::string Text = readOrDie(Source); clang::LangOptions LangOpts; // FIXME: use real options. auto Stream = clang::syntax::pseudo::lex(Text, LangOpts); - auto Structure = clang::syntax::pseudo::PPStructure::parse(Stream); + auto Structure = clang::syntax::pseudo::DirectiveMap::parse(Stream); - if (PrintPPStructure) + if (PrintDirectiveMap) llvm::outs() << Structure; if (PrintSource) Stream.print(llvm::outs()); diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -12669,6 +12669,13 @@ "};", MergeInlineOnly); verifyFormat("int f() {}", MergeInlineOnly); + // https://llvm.org/PR54147 + verifyFormat("auto lambda = []() {\n" + " // comment\n" + " f();\n" + " g();\n" + "};", + MergeInlineOnly); // Also verify behavior when BraceWrapping.AfterFunction = true MergeInlineOnly.BreakBeforeBraces = FormatStyle::BS_Custom; diff --git a/clang/unittests/Tooling/LookupTest.cpp b/clang/unittests/Tooling/LookupTest.cpp --- a/clang/unittests/Tooling/LookupTest.cpp +++ b/clang/unittests/Tooling/LookupTest.cpp @@ -8,12 +8,15 @@ #include "clang/Tooling/Refactoring/Lookup.h" #include "TestVisitor.h" +#include "clang/AST/TypeLoc.h" +#include "clang/Basic/SourceLocation.h" using namespace clang; namespace { struct GetDeclsVisitor : TestVisitor { std::function OnCall; std::function OnRecordTypeLoc; + std::function OnUsingTypeLoc; SmallVector DeclStack; bool VisitCallExpr(CallExpr *Expr) { @@ -28,6 +31,12 @@ return true; } + bool VisitUsingTypeLoc(UsingTypeLoc Loc) { + if (OnUsingTypeLoc) + OnUsingTypeLoc(Loc); + return true; + } + bool TraverseDecl(Decl *D) { DeclStack.push_back(D); bool Ret = TestVisitor::TraverseDecl(D); @@ -181,19 +190,19 @@ TEST(LookupTest, replaceNestedClassName) { GetDeclsVisitor Visitor; - auto replaceRecordTypeLoc = [&](RecordTypeLoc TLoc, - StringRef ReplacementString) { - const auto *FD = cast(TLoc.getDecl()); + auto replaceTypeLoc = [&](const NamedDecl *ND, SourceLocation Loc, + StringRef ReplacementString) { return tooling::replaceNestedName( - nullptr, TLoc.getBeginLoc(), Visitor.DeclStack.back()->getDeclContext(), - FD, ReplacementString); + nullptr, Loc, Visitor.DeclStack.back()->getDeclContext(), ND, + ReplacementString); }; Visitor.OnRecordTypeLoc = [&](RecordTypeLoc Type) { // Filter Types by name since there are other `RecordTypeLoc` in the test // file. if (Type.getDecl()->getQualifiedNameAsString() == "a::b::Foo") { - EXPECT_EQ("x::Bar", replaceRecordTypeLoc(Type, "::a::x::Bar")); + EXPECT_EQ("x::Bar", replaceTypeLoc(Type.getDecl(), Type.getBeginLoc(), + "::a::x::Bar")); } }; Visitor.runOver("namespace a { namespace b {\n" @@ -201,12 +210,13 @@ "namespace c { Foo f();; }\n" "} }\n"); - Visitor.OnRecordTypeLoc = [&](RecordTypeLoc Type) { + Visitor.OnUsingTypeLoc = [&](UsingTypeLoc Type) { // Filter Types by name since there are other `RecordTypeLoc` in the test // file. // `a::b::Foo` in using shadow decl is not `TypeLoc`. - if (Type.getDecl()->getQualifiedNameAsString() == "a::b::Foo") { - EXPECT_EQ("Bar", replaceRecordTypeLoc(Type, "::a::x::Bar")); + auto *TD = Type.getFoundDecl()->getTargetDecl(); + if (TD->getQualifiedNameAsString() == "a::b::Foo") { + EXPECT_EQ("Bar", replaceTypeLoc(TD, Type.getBeginLoc(), "::a::x::Bar")); } }; Visitor.runOver("namespace a { namespace b { class Foo {}; } }\n" @@ -218,7 +228,8 @@ // it's not visible at [0]. Visitor.OnRecordTypeLoc = [&](RecordTypeLoc Type) { if (Type.getDecl()->getQualifiedNameAsString() == "x::y::Old") { - EXPECT_EQ("Foo", replaceRecordTypeLoc(Type, "::x::Foo")); + EXPECT_EQ("Foo", + replaceTypeLoc(Type.getDecl(), Type.getBeginLoc(), "::x::Foo")); } }; Visitor.runOver(R"( diff --git a/clang/unittests/Tooling/Syntax/Pseudo/CMakeLists.txt b/clang/unittests/Tooling/Syntax/Pseudo/CMakeLists.txt --- a/clang/unittests/Tooling/Syntax/Pseudo/CMakeLists.txt +++ b/clang/unittests/Tooling/Syntax/Pseudo/CMakeLists.txt @@ -3,9 +3,9 @@ ) add_clang_unittest(ClangPseudoTests + DirectiveMapTest.cpp GrammarTest.cpp LRTableTest.cpp - PreprocessTest.cpp TokenTest.cpp ) diff --git a/clang/unittests/Tooling/Syntax/Pseudo/PreprocessTest.cpp b/clang/unittests/Tooling/Syntax/Pseudo/DirectiveMapTest.cpp rename from clang/unittests/Tooling/Syntax/Pseudo/PreprocessTest.cpp rename to clang/unittests/Tooling/Syntax/Pseudo/DirectiveMapTest.cpp --- a/clang/unittests/Tooling/Syntax/Pseudo/PreprocessTest.cpp +++ b/clang/unittests/Tooling/Syntax/Pseudo/DirectiveMapTest.cpp @@ -1,4 +1,4 @@ -//===--- TokenTest.cpp ----------------------------------------------------===// +//===--- DirectiveMapTest.cpp ---------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "clang/Tooling/Syntax/Pseudo/Preprocess.h" +#include "clang/Tooling/Syntax/Pseudo/DirectiveMap.h" #include "clang/Basic/LangOptions.h" #include "clang/Basic/TokenKinds.h" @@ -26,7 +26,7 @@ using testing::Matcher; using testing::Pair; using testing::StrEq; -using Chunk = PPStructure::Chunk; +using Chunk = DirectiveMap::Chunk; MATCHER_P2(tokensAre, TS, Tokens, "tokens are " + std::string(Tokens)) { std::vector Texts; @@ -38,7 +38,7 @@ MATCHER_P(chunkKind, K, "") { return arg.kind() == K; } -TEST(PPStructure, Parse) { +TEST(DirectiveMap, Parse) { LangOptions Opts; std::string Code = R"cpp( #include @@ -57,30 +57,30 @@ )cpp"; TokenStream S = cook(lex(Code, Opts), Opts); - PPStructure PP = PPStructure::parse(S); + DirectiveMap PP = DirectiveMap::parse(S); ASSERT_THAT(PP.Chunks, ElementsAre(chunkKind(Chunk::K_Directive), chunkKind(Chunk::K_Code), chunkKind(Chunk::K_Conditional), chunkKind(Chunk::K_Code))); - EXPECT_THAT((const PPStructure::Directive &)PP.Chunks[0], + EXPECT_THAT((const DirectiveMap::Directive &)PP.Chunks[0], tokensAre(S, "# include < foo . h >")); - EXPECT_THAT((const PPStructure::Code &)PP.Chunks[1], + EXPECT_THAT((const DirectiveMap::Code &)PP.Chunks[1], tokensAre(S, "int main ( ) {")); - EXPECT_THAT((const PPStructure::Code &)PP.Chunks[3], tokensAre(S, "}")); + EXPECT_THAT((const DirectiveMap::Code &)PP.Chunks[3], tokensAre(S, "}")); - const PPStructure::Conditional &Ifdef(PP.Chunks[2]); + const DirectiveMap::Conditional &Ifdef(PP.Chunks[2]); EXPECT_THAT(Ifdef.Branches, ElementsAre(Pair(tokensAre(S, "# ifdef HAS_FOO"), _), Pair(tokensAre(S, "# elif NEEDS_FOO"), _))); EXPECT_THAT(Ifdef.End, tokensAre(S, "# endif")); - const PPStructure &HasFoo(Ifdef.Branches[0].second); - const PPStructure &NeedsFoo(Ifdef.Branches[1].second); + const DirectiveMap &HasFoo(Ifdef.Branches[0].second); + const DirectiveMap &NeedsFoo(Ifdef.Branches[1].second); EXPECT_THAT(HasFoo.Chunks, ElementsAre(chunkKind(Chunk::K_Conditional))); - const PPStructure::Conditional &If(HasFoo.Chunks[0]); + const DirectiveMap::Conditional &If(HasFoo.Chunks[0]); EXPECT_THAT(If.Branches, ElementsAre(Pair(tokensAre(S, "# if HAS_BAR"), _), Pair(tokensAre(S, "# else"), _))); EXPECT_THAT(If.Branches[0].second.Chunks, @@ -89,12 +89,12 @@ ElementsAre(chunkKind(Chunk::K_Code))); EXPECT_THAT(NeedsFoo.Chunks, ElementsAre(chunkKind(Chunk::K_Directive))); - const PPStructure::Directive &Error(NeedsFoo.Chunks[0]); + const DirectiveMap::Directive &Error(NeedsFoo.Chunks[0]); EXPECT_THAT(Error, tokensAre(S, "# error missing_foo")); EXPECT_EQ(Error.Kind, tok::pp_error); } -TEST(PPStructure, ParseUgly) { +TEST(DirectiveMap, ParseUgly) { LangOptions Opts; std::string Code = R"cpp( /*A*/ # /*B*/ \ @@ -104,19 +104,19 @@ /*E*/ )cpp"; TokenStream S = cook(lex(Code, Opts), Opts); - PPStructure PP = PPStructure::parse(S); + DirectiveMap PP = DirectiveMap::parse(S); ASSERT_THAT(PP.Chunks, ElementsAre(chunkKind(Chunk::K_Code), chunkKind(Chunk::K_Directive), chunkKind(Chunk::K_Code))); - EXPECT_THAT((const PPStructure::Code &)PP.Chunks[0], tokensAre(S, "/*A*/")); - const PPStructure::Directive &Define(PP.Chunks[1]); + EXPECT_THAT((const DirectiveMap::Code &)PP.Chunks[0], tokensAre(S, "/*A*/")); + const DirectiveMap::Directive &Define(PP.Chunks[1]); EXPECT_EQ(Define.Kind, tok::pp_define); EXPECT_THAT(Define, tokensAre(S, "# /*B*/ /*C*/ define BAR /*D*/")); - EXPECT_THAT((const PPStructure::Code &)PP.Chunks[2], tokensAre(S, "/*E*/")); + EXPECT_THAT((const DirectiveMap::Code &)PP.Chunks[2], tokensAre(S, "/*E*/")); } -TEST(PPStructure, ParseBroken) { +TEST(DirectiveMap, ParseBroken) { LangOptions Opts; std::string Code = R"cpp( a @@ -125,17 +125,17 @@ b )cpp"; TokenStream S = cook(lex(Code, Opts), Opts); - PPStructure PP = PPStructure::parse(S); + DirectiveMap PP = DirectiveMap::parse(S); ASSERT_THAT(PP.Chunks, ElementsAre(chunkKind(Chunk::K_Code), chunkKind(Chunk::K_Directive), chunkKind(Chunk::K_Conditional))); - EXPECT_THAT((const PPStructure::Code &)PP.Chunks[0], tokensAre(S, "a")); - const PPStructure::Directive &Endif(PP.Chunks[1]); + EXPECT_THAT((const DirectiveMap::Code &)PP.Chunks[0], tokensAre(S, "a")); + const DirectiveMap::Directive &Endif(PP.Chunks[1]); EXPECT_EQ(Endif.Kind, tok::pp_endif); EXPECT_THAT(Endif, tokensAre(S, "# endif // mismatched")); - const PPStructure::Conditional &X(PP.Chunks[2]); + const DirectiveMap::Conditional &X(PP.Chunks[2]); EXPECT_EQ(1u, X.Branches.size()); // The (only) branch of the broken conditional section runs until eof. EXPECT_EQ(tok::pp_if, X.Branches.front().first.Kind); diff --git a/flang/tools/bbc/bbc.cpp b/flang/tools/bbc/bbc.cpp --- a/flang/tools/bbc/bbc.cpp +++ b/flang/tools/bbc/bbc.cpp @@ -41,7 +41,7 @@ #include "mlir/IR/AsmState.h" #include "mlir/IR/BuiltinOps.h" #include "mlir/IR/MLIRContext.h" -#include "mlir/Parser.h" +#include "mlir/Parser/Parser.h" #include "mlir/Pass/Pass.h" #include "mlir/Pass/PassManager.h" #include "mlir/Pass/PassRegistry.h" diff --git a/flang/tools/fir-opt/fir-opt.cpp b/flang/tools/fir-opt/fir-opt.cpp --- a/flang/tools/fir-opt/fir-opt.cpp +++ b/flang/tools/fir-opt/fir-opt.cpp @@ -11,7 +11,7 @@ // //===----------------------------------------------------------------------===// -#include "mlir/Support/MlirOptMain.h" +#include "mlir/Tools/mlir-opt/MlirOptMain.h" #include "flang/Optimizer/CodeGen/CodeGen.h" #include "flang/Optimizer/Support/InitFIR.h" #include "flang/Optimizer/Transforms/Passes.h" diff --git a/flang/tools/tco/tco.cpp b/flang/tools/tco/tco.cpp --- a/flang/tools/tco/tco.cpp +++ b/flang/tools/tco/tco.cpp @@ -20,7 +20,7 @@ #include "mlir/IR/AsmState.h" #include "mlir/IR/BuiltinOps.h" #include "mlir/IR/MLIRContext.h" -#include "mlir/Parser.h" +#include "mlir/Parser/Parser.h" #include "mlir/Pass/Pass.h" #include "mlir/Pass/PassManager.h" #include "mlir/Transforms/Passes.h" @@ -79,7 +79,7 @@ mlir::MLIRContext context(registry); fir::support::loadDialects(context); fir::support::registerLLVMTranslation(context); - auto owningRef = mlir::parseSourceFile(sourceMgr, &context); + auto owningRef = mlir::parseSourceFile(sourceMgr, &context); if (!owningRef) { errs() << "Error can't load file " << inputFilename << '\n'; diff --git a/libc/loader/linux/x86_64/start.cpp b/libc/loader/linux/x86_64/start.cpp --- a/libc/loader/linux/x86_64/start.cpp +++ b/libc/loader/linux/x86_64/start.cpp @@ -38,7 +38,9 @@ return; // We will assume the alignment is always a power of two. - uintptr_t tlsSize = (app.tls.size + app.tls.align) & -app.tls.align; + uintptr_t tlsSize = app.tls.size & -app.tls.align; + if (tlsSize != app.tls.size) + tlsSize += app.tls.align; // Per the x86_64 TLS ABI, the entry pointed to by the thread pointer is the // address of the TLS block. So, we add more size to accomodate this address diff --git a/libcxx/docs/ReleaseNotes.rst b/libcxx/docs/ReleaseNotes.rst --- a/libcxx/docs/ReleaseNotes.rst +++ b/libcxx/docs/ReleaseNotes.rst @@ -39,7 +39,7 @@ ------------ - Implemented P0627R6 (Function to mark unreachable code) - - Implemented P1165R1 (Make stateful allocator propagation more consistent for operator+(basic_string)) + - Implemented P1165R1 (Make stateful allocator propagation more consistent for ``operator+(basic_string)``) API Changes ----------- @@ -49,6 +49,7 @@ they were not supposed to set ``_LIBCPP_ABI_UNSTABLE`` manually, however we still feel that it is worth mentioning in the release notes in case some users had been doing it. + - The header ```` has been removed. Instead, use ```` header. The associated macro ``_LIBCPP_DEPRECATED_EXPERIMENTAL_FILESYSTEM`` has also been removed. diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt --- a/libcxx/include/CMakeLists.txt +++ b/libcxx/include/CMakeLists.txt @@ -66,6 +66,7 @@ __algorithm/pop_heap.h __algorithm/prev_permutation.h __algorithm/push_heap.h + __algorithm/ranges_max_element.h __algorithm/ranges_min_element.h __algorithm/ranges_swap_ranges.h __algorithm/remove.h @@ -448,6 +449,7 @@ ctgmath ctime ctype.h + cuchar cwchar cwctype deque @@ -541,6 +543,7 @@ type_traits typeindex typeinfo + uchar.h unordered_map unordered_set utility diff --git a/libcxx/include/__algorithm/ranges_max_element.h b/libcxx/include/__algorithm/ranges_max_element.h new file mode 100644 --- /dev/null +++ b/libcxx/include/__algorithm/ranges_max_element.h @@ -0,0 +1,69 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___ALGORITHM_RANGES_MAX_ELEMENT_H +#define _LIBCPP___ALGORITHM_RANGES_MAX_ELEMENT_H + +#include <__config> +#include <__functional/identity.h> +#include <__functional/invoke.h> +#include <__functional/ranges_operations.h> +#include <__iterator/concepts.h> +#include <__iterator/projected.h> +#include <__ranges/access.h> +#include <__ranges/concepts.h> +#include <__ranges/dangling.h> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +#if !defined(_LIBCPP_HAS_NO_CONCEPTS) && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +_LIBCPP_BEGIN_NAMESPACE_STD + +namespace ranges { +namespace __max_element { +struct __fn { + template + _LIBCPP_HIDE_FROM_ABI static constexpr _Ip __go(_Ip __first, _Sp __last, _Comp& __comp, _Proj& __proj) { + if (__first == __last) + return __first; + + _Ip __i = __first; + while (++__i != __last) + if (std::invoke(__comp, std::invoke(__proj, *__first), std::invoke(__proj, *__i))) + __first = __i; + return __first; + } + + template _Sp, class _Proj = identity, + indirect_strict_weak_order> _Comp = ranges::less> + _LIBCPP_HIDE_FROM_ABI constexpr _Ip operator()(_Ip __first, _Sp __last, _Comp __comp = {}, _Proj __proj = {}) const { + return __go(__first, __last, __comp, __proj); + } + + template , _Proj>> _Comp = ranges::less> + _LIBCPP_HIDE_FROM_ABI constexpr borrowed_iterator_t<_Rp> operator()(_Rp&& __r, _Comp __comp = {}, + _Proj __proj = {}) const { + return __go(ranges::begin(__r), ranges::end(__r), __comp, __proj); + } +}; +} // namespace __max_element + +inline namespace __cpo { +inline constexpr auto max_element = __max_element::__fn{}; +} // namespace __cpo +} // namespace ranges + +_LIBCPP_END_NAMESPACE_STD + +#endif // !defined(_LIBCPP_HAS_NO_CONCEPTS) && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +#endif // _LIBCPP___ALGORITHM_RANGES_MAX_ELEMENT_H diff --git a/libcxx/include/__memory/unique_ptr.h b/libcxx/include/__memory/unique_ptr.h --- a/libcxx/include/__memory/unique_ptr.h +++ b/libcxx/include/__memory/unique_ptr.h @@ -46,10 +46,8 @@ 0) _NOEXCEPT {} _LIBCPP_INLINE_VISIBILITY void operator()(_Tp* __ptr) const _NOEXCEPT { - static_assert(sizeof(_Tp) > 0, - "default_delete can not delete incomplete type"); - static_assert(!is_void<_Tp>::value, - "default_delete can not delete incomplete type"); + static_assert(sizeof(_Tp) >= 0, "cannot delete an incomplete type"); + static_assert(!is_void<_Tp>::value, "cannot delete an incomplete type"); delete __ptr; } }; @@ -77,10 +75,7 @@ _LIBCPP_INLINE_VISIBILITY typename _EnableIfConvertible<_Up>::type operator()(_Up* __ptr) const _NOEXCEPT { - static_assert(sizeof(_Tp) > 0, - "default_delete can not delete incomplete type"); - static_assert(!is_void<_Tp>::value, - "default_delete can not delete void type"); + static_assert(sizeof(_Up) >= 0, "cannot delete an incomplete type"); delete[] __ptr; } }; diff --git a/libcxx/include/__ranges/access.h b/libcxx/include/__ranges/access.h --- a/libcxx/include/__ranges/access.h +++ b/libcxx/include/__ranges/access.h @@ -58,14 +58,14 @@ struct __fn { template [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto operator()(_Tp (&__t)[]) const noexcept - requires (sizeof(_Tp) != 0) // Disallow incomplete element types. + requires(sizeof(_Tp) >= 0) // Disallow incomplete element types. { return __t + 0; } template [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto operator()(_Tp (&__t)[_Np]) const noexcept - requires (sizeof(_Tp) != 0) // Disallow incomplete element types. + requires(sizeof(_Tp) >= 0) // Disallow incomplete element types. { return __t + 0; } @@ -132,7 +132,7 @@ public: template [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr auto operator()(_Tp (&__t)[_Np]) const noexcept - requires (sizeof(_Tp) != 0) // Disallow incomplete element types. + requires(sizeof(_Tp) >= 0) // Disallow incomplete element types. { return __t + _Np; } diff --git a/libcxx/include/algorithm b/libcxx/include/algorithm --- a/libcxx/include/algorithm +++ b/libcxx/include/algorithm @@ -763,6 +763,7 @@ #include <__algorithm/pop_heap.h> #include <__algorithm/prev_permutation.h> #include <__algorithm/push_heap.h> +#include <__algorithm/ranges_max_element.h> #include <__algorithm/ranges_min_element.h> #include <__algorithm/ranges_swap_ranges.h> #include <__algorithm/remove.h> diff --git a/libcxx/include/cuchar b/libcxx/include/cuchar new file mode 100644 --- /dev/null +++ b/libcxx/include/cuchar @@ -0,0 +1,60 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP_CUCHAR +#define _LIBCPP_CUCHAR + +/* + cuchar synopsis // since C++11 + +Macros: + + __STDC_UTF_16__ + __STDC_UTF_32__ + +namespace std { + +Types: + + mbstate_t + size_t + +size_t mbrtoc16(char16_t* pc16, const char* s, size_t n, mbstate_t* ps); +size_t c16rtomb(char* s, char16_t c16, mbstate_t* ps); +size_t mbrtoc32(char32_t* pc32, const char* s, size_t n, mbstate_t* ps); +size_t c32rtomb(char* s, char32_t c32, mbstate_t* ps); + +} // std + +*/ + +#include <__config> +#include + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if !defined(_LIBCPP_CXX03_LANG) + +using ::mbstate_t _LIBCPP_USING_IF_EXISTS; +using ::size_t _LIBCPP_USING_IF_EXISTS; + +using ::mbrtoc16 _LIBCPP_USING_IF_EXISTS; +using ::c16rtomb _LIBCPP_USING_IF_EXISTS; +using ::mbrtoc32 _LIBCPP_USING_IF_EXISTS; +using ::c32rtomb _LIBCPP_USING_IF_EXISTS; + +#endif // _LIBCPP_CXX03_LANG + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_CUCHAR diff --git a/libcxx/include/module.modulemap b/libcxx/include/module.modulemap --- a/libcxx/include/module.modulemap +++ b/libcxx/include/module.modulemap @@ -80,7 +80,10 @@ header "string.h" export * } - // FIXME: is missing. + module uchar_h { + header "uchar.h" + export * + } // provided by C library. module wchar_h { // 's __need_* macros require textual inclusion. @@ -203,7 +206,10 @@ header "ctime" export * } - // FIXME: is missing. + module cuchar { + header "cuchar" + export * + } module cwchar { header "cwchar" export depr.stdio_h @@ -288,6 +294,7 @@ module pop_heap { private header "__algorithm/pop_heap.h" } module prev_permutation { private header "__algorithm/prev_permutation.h" } module push_heap { private header "__algorithm/push_heap.h" } + module ranges_max_element { private header "__algorithm/ranges_max_element.h" } module ranges_min_element { private header "__algorithm/ranges_min_element.h" } module ranges_swap_ranges { private header "__algorithm/ranges_swap_ranges.h" } module remove { private header "__algorithm/remove.h" } diff --git a/libcxx/include/uchar.h b/libcxx/include/uchar.h new file mode 100644 --- /dev/null +++ b/libcxx/include/uchar.h @@ -0,0 +1,52 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP_UCHAR_H +#define _LIBCPP_UCHAR_H + +/* + uchar.h synopsis // since C++11 + +Macros: + + __STDC_UTF_16__ + __STDC_UTF_32__ + +Types: + + mbstate_t + size_t + +size_t mbrtoc16(char16_t* pc16, const char* s, size_t n, mbstate_t* ps); +size_t c16rtomb(char* s, char16_t c16, mbstate_t* ps); +size_t mbrtoc32(char32_t* pc32, const char* s, size_t n, mbstate_t* ps); +size_t c32rtomb(char* s, char32_t c32, mbstate_t* ps); + +*/ + +#include <__config> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +#if !defined(_LIBCPP_CXX03_LANG) + +// Some platforms don't implement and we don't want to give a hard +// error on those platforms. When the platform doesn't provide , at +// least include so we get the declaration for size_t. +# if __has_include_next() +# include_next +# else +# include +# endif + +#endif // _LIBCPP_CXX03_LANG + +#endif // _LIBCPP_UCHAR_H diff --git a/libcxx/test/libcxx/clang_tidy.sh.cpp b/libcxx/test/libcxx/clang_tidy.sh.cpp --- a/libcxx/test/libcxx/clang_tidy.sh.cpp +++ b/libcxx/test/libcxx/clang_tidy.sh.cpp @@ -74,6 +74,7 @@ #include #include #include +#include #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS # include #endif @@ -188,6 +189,7 @@ #include #include #include +#include #include #include #include diff --git a/libcxx/test/std/depr/depr.c.headers/uchar_h.pass.cpp b/libcxx/test/libcxx/diagnostics/detail.headers/algorithm/ranges_max_element.module.verify.cpp rename from libcxx/test/std/depr/depr.c.headers/uchar_h.pass.cpp rename to libcxx/test/libcxx/diagnostics/detail.headers/algorithm/ranges_max_element.module.verify.cpp --- a/libcxx/test/std/depr/depr.c.headers/uchar_h.pass.cpp +++ b/libcxx/test/libcxx/diagnostics/detail.headers/algorithm/ranges_max_element.module.verify.cpp @@ -5,18 +5,11 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// -// XFAIL: suse-linux-enterprise-server-11 -// XFAIL: darwin -// XFAIL: netbsd -// XFAIL: LIBCXX-AIX-FIXME - -// -#include +// REQUIRES: modules-build -int main(int, char**) -{ +// WARNING: This test was generated by 'generate_private_header_tests.py' +// and should not be edited manually. - return 0; -} +// expected-error@*:* {{use of private header from outside its module: '__algorithm/ranges_max_element.h'}} +#include <__algorithm/ranges_max_element.h> diff --git a/libcxx/test/libcxx/double_include.sh.cpp b/libcxx/test/libcxx/double_include.sh.cpp --- a/libcxx/test/libcxx/double_include.sh.cpp +++ b/libcxx/test/libcxx/double_include.sh.cpp @@ -13,6 +13,9 @@ // RUN: %{cxx} -o %t.exe %t.first.o %t.second.o %{flags} %{link_flags} // RUN: %{run} +// The system-provided seems to be broken on AIX +// XFAIL: LIBCXX-AIX-FIXME + // Prevent from generating deprecated warnings for this test. #if defined(__DEPRECATED) # undef __DEPRECATED @@ -75,6 +78,7 @@ #include #include #include +#include #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS # include #endif @@ -189,6 +193,7 @@ #include #include #include +#include #include #include #include diff --git a/libcxx/test/libcxx/min_max_macros.compile.pass.cpp b/libcxx/test/libcxx/min_max_macros.compile.pass.cpp --- a/libcxx/test/libcxx/min_max_macros.compile.pass.cpp +++ b/libcxx/test/libcxx/min_max_macros.compile.pass.cpp @@ -9,6 +9,9 @@ // Test that headers are not tripped up by the surrounding code defining the // min() and max() macros. +// The system-provided seems to be broken on AIX +// XFAIL: LIBCXX-AIX-FIXME + // Prevent from generating deprecated warnings for this test. #if defined(__DEPRECATED) # undef __DEPRECATED @@ -114,6 +117,8 @@ TEST_MACROS(); #include TEST_MACROS(); +#include +TEST_MACROS(); #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS # include TEST_MACROS(); @@ -296,6 +301,8 @@ TEST_MACROS(); #include TEST_MACROS(); +#include +TEST_MACROS(); #include TEST_MACROS(); #include diff --git a/libcxx/test/libcxx/nasty_macros.compile.pass.cpp b/libcxx/test/libcxx/nasty_macros.compile.pass.cpp --- a/libcxx/test/libcxx/nasty_macros.compile.pass.cpp +++ b/libcxx/test/libcxx/nasty_macros.compile.pass.cpp @@ -9,6 +9,9 @@ // Test that headers are not tripped up by the surrounding code defining various // alphabetic macros. +// The system-provided seems to be broken on AIX +// XFAIL: LIBCXX-AIX-FIXME + // Prevent from generating deprecated warnings for this test. #if defined(__DEPRECATED) # undef __DEPRECATED @@ -185,6 +188,7 @@ #include #include #include +#include #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS # include #endif @@ -299,6 +303,7 @@ #include #include #include +#include #include #include #include diff --git a/libcxx/test/libcxx/no_assert_include.compile.pass.cpp b/libcxx/test/libcxx/no_assert_include.compile.pass.cpp --- a/libcxx/test/libcxx/no_assert_include.compile.pass.cpp +++ b/libcxx/test/libcxx/no_assert_include.compile.pass.cpp @@ -9,6 +9,9 @@ // Ensure that none of the standard C++ headers implicitly include cassert or // assert.h (because assert() is implemented as a macro). +// The system-provided seems to be broken on AIX +// XFAIL: LIBCXX-AIX-FIXME + // Prevent from generating deprecated warnings for this test. #if defined(__DEPRECATED) # undef __DEPRECATED @@ -70,6 +73,7 @@ #include #include #include +#include #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS # include #endif @@ -184,6 +188,7 @@ #include #include #include +#include #include #include #include diff --git a/libcxx/test/libcxx/strings/c.strings/version_cuchar.pass.cpp b/libcxx/test/libcxx/strings/c.strings/version_cuchar.pass.cpp --- a/libcxx/test/libcxx/strings/c.strings/version_cuchar.pass.cpp +++ b/libcxx/test/libcxx/strings/c.strings/version_cuchar.pass.cpp @@ -5,13 +5,11 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// -// XFAIL: * -// Skip this test on windows. If built on top of the MSVC runtime, the -// header actually does exist (although not provided by us). -// This should be removed once D97870 has landed. -// UNSUPPORTED: windows +// UNSUPPORTED: c++03 + +// The system-provided seems to be broken on AIX +// XFAIL: LIBCXX-AIX-FIXME // diff --git a/libcxx/test/std/algorithms/alg.sorting/alg.min.max/ranges.max_element.pass.cpp b/libcxx/test/std/algorithms/alg.sorting/alg.min.max/ranges.max_element.pass.cpp new file mode 100644 --- /dev/null +++ b/libcxx/test/std/algorithms/alg.sorting/alg.min.max/ranges.max_element.pass.cpp @@ -0,0 +1,196 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// + +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// UNSUPPORTED: libcpp-no-concepts +// UNSUPPORTED: libcpp-has-no-incomplete-ranges + +// template S, class Proj = identity, +// indirect_strict_weak_order> Comp = ranges::less> +// constexpr I ranges::max_element(I first, S last, Comp comp = {}, Proj proj = {}); +// +// template, Proj>> Comp = ranges::less> +// constexpr borrowed_iterator_t ranges::max_element(R&& r, Comp comp = {}, Proj proj = {}); + +#include +#include +#include +#include +#include + +#include "test_macros.h" +#include "test_iterators.h" + +template +concept HasMaxElement = requires(T t) { + std::ranges::max_element(t); +}; + +struct NoLessThanOp {}; +struct NotTotallyOrdered { + int i; + bool operator<(const NotTotallyOrdered& o) const { return i < o.i; } +}; + +static_assert(HasMaxElement>); +static_assert(!HasMaxElement); +static_assert(!HasMaxElement); +static_assert(!HasMaxElement); + +template +constexpr void test_iterators(Iter first, Iter last) { + std::same_as auto it = std::ranges::max_element(first, last); + if (first != last) { + for (Iter j = first; j != last; ++j) + assert(!(*j > *it)); + } else { + assert(it == first); + } +} + +template +constexpr void test_range(Range&& rng, Iter begin, Iter end) { + std::same_as auto it = std::ranges::max_element(std::forward(rng)); + if (begin != end) { + for (Iter j = begin; j != end; ++j) + assert(!(*j > *it)); + } else { + assert(it == begin); + } +} + +template +constexpr void test(std::initializer_list a, int expected) { + const int* first = a.begin(); + const int* last = a.end(); + { + std::same_as auto it = std::ranges::max_element(It(first), It(last)); + assert(base(it) == first + expected); + } + { + using Sent = sentinel_wrapper; + std::same_as auto it = std::ranges::max_element(It(first), Sent(It(last))); + assert(base(it) == first + expected); + } + { + auto range = std::ranges::subrange(It(first), It(last)); + std::same_as auto it = std::ranges::max_element(range); + assert(base(it) == first + expected); + } + { + using Sent = sentinel_wrapper; + auto range = std::ranges::subrange(It(first), Sent(It(last))); + std::same_as auto it = std::ranges::max_element(range); + assert(base(it) == first + expected); + } +} + +template +constexpr bool test() { + test({}, 0); + test({1}, 0); + test({1, 2}, 1); + test({2, 1}, 0); + test({2, 1, 2}, 0); + test({2, 1, 1}, 0); + + return true; +} + +constexpr void test_borrowed_range_and_sentinel() { + int a[] = {7, 6, 1, 3, 5, 1, 2, 4}; + + int* ret = std::ranges::max_element(std::views::all(a)); + assert(ret == a + 0); + assert(*ret == 7); +} + +constexpr void test_comparator() { + int a[] = {7, 6, 9, 3, 5, 1, 2, 4}; + int* ret = std::ranges::max_element(a, std::ranges::greater{}); + assert(ret == a + 5); + assert(*ret == 1); +} + +constexpr void test_projection() { + int a[] = {7, 6, 9, 3, 5, 1, 2, 4}; + { + int* ret = std::ranges::max_element(a, std::ranges::less{}, [](int i) { return i == 5 ? 100 : i; }); + assert(ret == a + 4); + assert(*ret == 5); + } + { + int* ret = std::ranges::max_element(a, std::less{}, [](int& i) { return &i; }); + assert(ret == a + 7); + assert(*ret == 4); + } +} + +struct Immobile { + int i; + + constexpr Immobile(int i_) : i(i_) {} + Immobile(const Immobile&) = delete; + Immobile(Immobile&&) = delete; + + auto operator<=>(const Immobile&) const = default; +}; + +constexpr void test_immobile() { + + Immobile arr[]{1, 2, 3}; + assert(std::ranges::max_element(arr) == arr); + assert(std::ranges::max_element(arr, arr + 3) == arr); +} + +constexpr void test_dangling() { + int compares = 0; + int projections = 0; + auto comparator = [&](int a, int b) { + ++compares; + return a < b; + }; + auto projection = [&](int a) { + ++projections; + return a; + }; + [[maybe_unused]] std::same_as auto ret = + std::ranges::max_element(std::array{1, 2, 3}, comparator, projection); + assert(compares == 2); + assert(projections == 4); +} + +constexpr bool test() { + + test>(); + test>(); + test>(); + test(); + + int a[] = {7, 6, 5, 3, 4, 2, 1, 8}; + test_iterators(a, a + 8); + int a2[] = {7, 6, 5, 3, 4, 2, 1, 8}; + test_range(a2, a2, a2 + 8); + + test_borrowed_range_and_sentinel(); + test_comparator(); + test_projection(); + test_dangling(); + + return true; +} + +int main(int, char**) { + test(); + static_assert(test()); + + return 0; +} diff --git a/libcxx/test/std/depr/depr.c.headers/uchar_h.compile.pass.cpp b/libcxx/test/std/depr/depr.c.headers/uchar_h.compile.pass.cpp new file mode 100644 --- /dev/null +++ b/libcxx/test/std/depr/depr.c.headers/uchar_h.compile.pass.cpp @@ -0,0 +1,30 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03 + +// Apple platforms don't provide yet, so these tests fail. +// XFAIL: target={{.+}}-apple-{{.+}} + +// The system-provided seems to be broken on AIX +// XFAIL: LIBCXX-AIX-FIXME + +// + +#include + +#include "test_macros.h" + +// __STDC_UTF_16__ may or may not be defined by the C standard library +// __STDC_UTF_32__ may or may not be defined by the C standard library + +ASSERT_SAME_TYPE(size_t, decltype(mbrtoc16((char16_t*)0, (const char*)0, (size_t)0, (mbstate_t*)0))); +ASSERT_SAME_TYPE(size_t, decltype(c16rtomb((char*)0, (char16_t)0, (mbstate_t*)0))); + +ASSERT_SAME_TYPE(size_t, decltype(mbrtoc32((char32_t*)0, (const char*)0, (size_t)0, (mbstate_t*)0))); +ASSERT_SAME_TYPE(size_t, decltype(c16rtomb((char*)0, (char32_t)0, (mbstate_t*)0))); diff --git a/libcxx/test/std/library/description/conventions/customization.point.object/niebloid.compile.pass.cpp b/libcxx/test/std/library/description/conventions/customization.point.object/niebloid.compile.pass.cpp --- a/libcxx/test/std/library/description/conventions/customization.point.object/niebloid.compile.pass.cpp +++ b/libcxx/test/std/library/description/conventions/customization.point.object/niebloid.compile.pass.cpp @@ -98,7 +98,7 @@ //static_assert(test(std::ranges::lower_bound, a, 42)); //static_assert(test(std::ranges::make_heap, a)); //static_assert(test(std::ranges::max, a)); -//static_assert(test(std::ranges::max_element, a)); +static_assert(test(std::ranges::max_element, a)); //static_assert(test(std::ranges::merge, a, a, a)); //static_assert(test(std::ranges::min, a)); static_assert(test(std::ranges::min_element, a)); diff --git a/libcxx/test/std/ranges/range.access/begin.sizezero.pass.cpp b/libcxx/test/std/ranges/range.access/begin.sizezero.pass.cpp new file mode 100644 --- /dev/null +++ b/libcxx/test/std/ranges/range.access/begin.sizezero.pass.cpp @@ -0,0 +1,35 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// UNSUPPORTED: libcpp-no-concepts +// UNSUPPORTED: msvc + +// std::ranges::begin +// std::ranges::cbegin +// Test the fix for https://llvm.org/PR54100 + +#include +#include + +#include "test_macros.h" + +struct A { + int m[0]; +}; +static_assert(sizeof(A) == 0); // an extension supported by GCC and Clang + +int main(int, char**) { + A a[10]; + std::same_as auto p = std::ranges::begin(a); + assert(p == a); + std::same_as auto cp = std::ranges::cbegin(a); + assert(cp == a); + + return 0; +} diff --git a/libcxx/test/std/ranges/range.access/end.sizezero.pass.cpp b/libcxx/test/std/ranges/range.access/end.sizezero.pass.cpp new file mode 100644 --- /dev/null +++ b/libcxx/test/std/ranges/range.access/end.sizezero.pass.cpp @@ -0,0 +1,35 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// UNSUPPORTED: libcpp-no-concepts +// UNSUPPORTED: msvc + +// std::ranges::end +// std::ranges::cend +// Test the fix for https://llvm.org/PR54100 + +#include +#include + +#include "test_macros.h" + +struct A { + int m[0]; +}; +static_assert(sizeof(A) == 0); // an extension supported by GCC and Clang + +int main(int, char**) { + A a[10]; + std::same_as auto p = std::ranges::end(a); + assert(p == a + 10); + std::same_as auto cp = std::ranges::cend(a); + assert(cp == a + 10); + + return 0; +} diff --git a/libcxx/test/std/strings/c.strings/cuchar.compile.pass.cpp b/libcxx/test/std/strings/c.strings/cuchar.compile.pass.cpp new file mode 100644 --- /dev/null +++ b/libcxx/test/std/strings/c.strings/cuchar.compile.pass.cpp @@ -0,0 +1,32 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03 + +// Apple platforms don't provide yet, so these tests fail. +// XFAIL: target={{.+}}-apple-{{.+}} + +// The system-provided seems to be broken on AIX +// XFAIL: LIBCXX-AIX-FIXME + +// + +#include + +#include "test_macros.h" + +// TODO: Implement mbrtoc8 and c8rtomb, and add tests for those + +// __STDC_UTF_16__ may or may not be defined by the C standard library +// __STDC_UTF_32__ may or may not be defined by the C standard library + +ASSERT_SAME_TYPE(size_t, decltype(std::mbrtoc16((char16_t*)0, (const char*)0, (size_t)0, (mbstate_t*)0))); +ASSERT_SAME_TYPE(size_t, decltype(std::c16rtomb((char*)0, (char16_t)0, (mbstate_t*)0))); + +ASSERT_SAME_TYPE(size_t, decltype(std::mbrtoc32((char32_t*)0, (const char*)0, (size_t)0, (mbstate_t*)0))); +ASSERT_SAME_TYPE(size_t, decltype(std::c16rtomb((char*)0, (char32_t)0, (mbstate_t*)0))); diff --git a/libcxx/test/std/strings/c.strings/cuchar.pass.cpp b/libcxx/test/std/strings/c.strings/cuchar.pass.cpp deleted file mode 100644 --- a/libcxx/test/std/strings/c.strings/cuchar.pass.cpp +++ /dev/null @@ -1,26 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// XFAIL: stdlib=libc++ - -// Skip this test on windows. If built on top of the MSVC runtime, the -// header actually does exist (although not provided by us). -// This should be removed once D97870 has landed. -// UNSUPPORTED: windows - -// - -#include - -#include "test_macros.h" - -int main(int, char**) -{ - - return 0; -} diff --git a/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.create/make_unique.sizezero.pass.cpp b/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.create/make_unique.sizezero.pass.cpp new file mode 100644 --- /dev/null +++ b/libcxx/test/std/utilities/smartptr/unique.ptr/unique.ptr.create/make_unique.sizezero.pass.cpp @@ -0,0 +1,45 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// This code triggers https://gcc.gnu.org/bugzilla/show_bug.cgi?id=104568 +// UNSUPPORTED: gcc-11 +// UNSUPPORTED: msvc + +// Test the fix for https://llvm.org/PR54100 + +#include +#include + +#include "test_macros.h" + +struct A { + int m[0]; +}; +static_assert(sizeof(A) == 0, ""); // an extension supported by GCC and Clang + +int main(int, char**) { + { + std::unique_ptr p = std::unique_ptr(new A); + assert(p != nullptr); + } + { + std::unique_ptr p = std::unique_ptr(new A[1]); + assert(p != nullptr); + } +#if TEST_STD_VER > 11 + { + std::unique_ptr p = std::make_unique(); + assert(p != nullptr); + } + { + std::unique_ptr p = std::make_unique(1); + assert(p != nullptr); + } +#endif + return 0; +} diff --git a/libcxx/utils/generate_header_inclusion_tests.py b/libcxx/utils/generate_header_inclusion_tests.py --- a/libcxx/utils/generate_header_inclusion_tests.py +++ b/libcxx/utils/generate_header_inclusion_tests.py @@ -67,6 +67,7 @@ "compare": "20", "concepts": "20", "coroutine": "20", + "cuchar": "11", "filesystem": "17", "initializer_list": "11", "optional": "17", @@ -76,6 +77,7 @@ "system_error": "11", "thread": "11", "tuple": "11", + "uchar.h": "11", "unordered_map": "11", "unordered_set": "11", "variant": "17", diff --git a/llvm/CODE_OWNERS.TXT b/llvm/CODE_OWNERS.TXT --- a/llvm/CODE_OWNERS.TXT +++ b/llvm/CODE_OWNERS.TXT @@ -19,17 +19,13 @@ I: arsenm D: InferAddressSpaces -N: Simon Atanasyan -E: simon@atanasyan.com -D: MIPS Backend (lib/Target/Mips/*) - N: Justin Bogner E: mail@justinbogner.com D: InstrProfiling and related parts of ProfileData D: SelectionDAG (lib/CodeGen/SelectionDAG/*) N: Alex Bradbury -E: asb@lowrisc.org +E: asb@asbradbury.org D: RISC-V backend (lib/Target/RISCV/*) N: Matthias Braun diff --git a/llvm/CREDITS.TXT b/llvm/CREDITS.TXT --- a/llvm/CREDITS.TXT +++ b/llvm/CREDITS.TXT @@ -52,7 +52,7 @@ D: APFloat implementation. N: Alex Bradbury -E: asb@lowrisc.org +E: asb@asbradbury.org D: RISC-V backend N: Misha Brukman diff --git a/llvm/include/llvm/Analysis/ConstraintSystem.h b/llvm/include/llvm/Analysis/ConstraintSystem.h --- a/llvm/include/llvm/Analysis/ConstraintSystem.h +++ b/llvm/include/llvm/Analysis/ConstraintSystem.h @@ -53,6 +53,11 @@ } bool addVariableRowFill(ArrayRef R) { + // If all variable coefficients are 0, the constraint does not provide any + // usable information. + if (all_of(makeArrayRef(R).drop_front(1), [](int64_t C) { return C == 0; })) + return false; + for (auto &CR : Constraints) { while (CR.size() != R.size()) CR.push_back(0); @@ -75,6 +80,12 @@ bool isConditionImplied(SmallVector R) const; void popLastConstraint() { Constraints.pop_back(); } + void popLastNVariables(unsigned N) { + for (auto &C : Constraints) { + for (unsigned i = 0; i < N; i++) + C.pop_back(); + } + } /// Returns the number of rows in the constraint system. unsigned size() const { return Constraints.size(); } diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h --- a/llvm/include/llvm/Analysis/ScalarEvolution.h +++ b/llvm/include/llvm/Analysis/ScalarEvolution.h @@ -67,6 +67,8 @@ class Value; enum SCEVTypes : unsigned short; +extern bool VerifySCEV; + /// This class represents an analyzed expression in the program. These are /// opaque objects that the client is not allowed to do much with directly. /// diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h --- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h +++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h @@ -784,7 +784,7 @@ struct OutlineInfo { using PostOutlineCBTy = std::function; PostOutlineCBTy PostOutlineCB; - BasicBlock *EntryBB, *ExitBB; + BasicBlock *EntryBB, *ExitBB, *OuterAllocaBB; SmallVector ExcludeArgsFromAggregate; /// Collect all blocks in between EntryBB and ExitBB in both the given diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td --- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td +++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td @@ -1722,15 +1722,9 @@ [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>; def int_ppc_fnmsub - : GCCBuiltin<"__builtin_ppc_fnmsub">, - Intrinsic <[llvm_double_ty], - [llvm_double_ty, llvm_double_ty, llvm_double_ty], - [IntrNoMem]>; - def int_ppc_fnmsubs - : GCCBuiltin<"__builtin_ppc_fnmsubs">, - Intrinsic <[llvm_float_ty], - [llvm_float_ty, llvm_float_ty, llvm_float_ty], - [IntrNoMem]>; + : Intrinsic<[llvm_anyfloat_ty], + [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], + [IntrNoMem]>; def int_ppc_fre : GCCBuiltin<"__builtin_ppc_fre">, Intrinsic <[llvm_double_ty], [llvm_double_ty], [IntrNoMem]>; diff --git a/llvm/include/llvm/IR/VectorBuilder.h b/llvm/include/llvm/IR/VectorBuilder.h new file mode 100644 --- /dev/null +++ b/llvm/include/llvm/IR/VectorBuilder.h @@ -0,0 +1,99 @@ +//===- llvm/VectorBuilder.h - Builder for VP Intrinsics ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the VectorBuilder class, which is used as a convenient way +// to create VP intrinsics as if they were LLVM instructions with a consistent +// and simplified interface. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_IR_VECTORBUILDER_H +#define LLVM_IR_VECTORBUILDER_H + +#include +#include +#include +#include + +namespace llvm { + +class VectorBuilder { +public: + enum class Behavior { + // Abort if the requested VP intrinsic could not be created. + // This is useful for strict consistency. + ReportAndAbort = 0, + + // Return a default-initialized value if the requested VP intrinsic could + // not be created. + // This is useful for a defensive fallback to non-VP code. + SilentlyReturnNone = 1, + }; + +private: + IRBuilder<> &Builder; + Behavior ErrorHandling; + + // Explicit mask parameter. + Value *Mask; + // Explicit vector length parameter. + Value *ExplicitVectorLength; + // Compile-time vector length. + ElementCount StaticVectorLength; + + // Get mask/evl value handles for the current configuration. + Value &requestMask(); + Value &requestEVL(); + + void handleError(const char *ErrorMsg) const; + template + RetType returnWithError(const char *ErrorMsg) const { + handleError(ErrorMsg); + return RetType(); + } + +public: + VectorBuilder(IRBuilder<> &Builder, + Behavior ErrorHandling = Behavior::ReportAndAbort) + : Builder(Builder), ErrorHandling(ErrorHandling), Mask(nullptr), + ExplicitVectorLength(nullptr), + StaticVectorLength(ElementCount::getFixed(0)) {} + + Module &getModule() const; + LLVMContext &getContext() const { return Builder.getContext(); } + + // All-true mask for the currently configured explicit vector length. + Value *getAllTrueMask(); + + VectorBuilder &setMask(Value *NewMask) { + Mask = NewMask; + return *this; + } + VectorBuilder &setEVL(Value *NewExplicitVectorLength) { + ExplicitVectorLength = NewExplicitVectorLength; + return *this; + } + VectorBuilder &setStaticVL(unsigned NewFixedVL) { + StaticVectorLength = ElementCount::getFixed(NewFixedVL); + return *this; + } + // TODO: setStaticVL(ElementCount) for scalable types. + + // Emit a VP intrinsic call that mimics a regular instruction. + // This operation behaves according to the VectorBuilderBehavior. + // \p Opcode The functional instruction opcode of the emitted intrinsic. + // \p ReturnTy The return type of the operation. + // \p VecOpArray The operand list. + Value *createVectorInstruction(unsigned Opcode, Type *ReturnTy, + ArrayRef VecOpArray, + const Twine &Name = Twine()); +}; + +} // namespace llvm + +#endif // LLVM_IR_VECTORBUILDER_H diff --git a/llvm/include/llvm/ObjCopy/MachO/MachOConfig.h b/llvm/include/llvm/ObjCopy/MachO/MachOConfig.h --- a/llvm/include/llvm/ObjCopy/MachO/MachOConfig.h +++ b/llvm/include/llvm/ObjCopy/MachO/MachOConfig.h @@ -29,6 +29,9 @@ // install-name-tool's id option Optional SharedLibId; + // Segments to remove if they are empty + DenseSet EmptySegmentsToRemove; + // Boolean options bool StripSwiftSymbols = false; bool KeepUndefined = false; diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -199,9 +199,24 @@ bool &UsedAssumedInformation, bool Intraprocedural = false); +/// Collect all potential values \p LI could read into \p PotentialValues. That +/// is, the only values read by \p LI are assumed to be known and all are in +/// \p PotentialValues. Dependences onto \p QueryingAA are properly tracked, +/// \p UsedAssumedInformation will inform the caller if assumed information was +/// used. +/// +/// \returns True if the assumed potential copies are all in \p PotentialValues, +/// false if something went wrong and the copies could not be +/// determined. +bool getPotentiallyLoadedValues(Attributor &A, LoadInst &LI, + SmallSetVector &PotentialValues, + const AbstractAttribute &QueryingAA, + bool &UsedAssumedInformation, + bool OnlyExact = false); + /// Collect all potential values of the one stored by \p SI into /// \p PotentialCopies. That is, the only copies that were made via the -/// store are assumed to be known and all in \p PotentialCopies. Dependences +/// store are assumed to be known and all are in \p PotentialCopies. Dependences /// onto \p QueryingAA are properly tracked, \p UsedAssumedInformation will /// inform the caller if assumed information was used. /// @@ -210,7 +225,8 @@ /// determined. bool getPotentialCopiesOfStoredValue( Attributor &A, StoreInst &SI, SmallSetVector &PotentialCopies, - const AbstractAttribute &QueryingAA, bool &UsedAssumedInformation); + const AbstractAttribute &QueryingAA, bool &UsedAssumedInformation, + bool OnlyExact = false); /// Return true if \p IRP is readonly. This will query respective AAs that /// deduce the information and introduce dependences for \p QueryingAA. @@ -1865,6 +1881,19 @@ bool checkForAllReturnedValues(function_ref Pred, const AbstractAttribute &QueryingAA); + /// Check \p Pred on all instructions in \p Fn with an opcode present in + /// \p Opcodes. + /// + /// This method will evaluate \p Pred on all instructions with an opcode + /// present in \p Opcode and return true if \p Pred holds on all of them. + bool checkForAllInstructions(function_ref Pred, + const Function *Fn, + const AbstractAttribute &QueryingAA, + const ArrayRef &Opcodes, + bool &UsedAssumedInformation, + bool CheckBBLivenessOnly = false, + bool CheckPotentiallyDead = false); + /// Check \p Pred on all instructions with an opcode present in \p Opcodes. /// /// This method will evaluate \p Pred on all instructions with an opcode @@ -4840,21 +4869,13 @@ virtual bool forallInterferingAccesses( OffsetAndSize OAS, function_ref CB) const = 0; - /// Call \p CB on all accesses that might interfere with \p LI and return true - /// if all such accesses were known and the callback returned true for all of - /// them, false otherwise. - virtual bool forallInterferingAccesses( - LoadInst &LI, function_ref CB) const = 0; - virtual bool forallInterferingAccesses( - StoreInst &SI, function_ref CB) const = 0; - - /// Call \p CB on all write accesses that might interfere with \p LI and + /// Call \p CB on all accesses that might interfere with \p I and /// return true if all such accesses were known and the callback returned true /// for all of them, false otherwise. In contrast to forallInterferingAccesses /// this function will perform reasoning to exclude write accesses that cannot /// affect the load even if they on the surface look as if they would. - virtual bool forallInterferingWrites( - Attributor &A, const AbstractAttribute &QueryingAA, LoadInst &LI, + virtual bool forallInterferingAccesses( + Attributor &A, const AbstractAttribute &QueryingAA, Instruction &I, function_ref CB) const = 0; /// This function should return true if the type of the \p AA is AAPointerInfo diff --git a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h --- a/llvm/include/llvm/Transforms/Utils/CodeExtractor.h +++ b/llvm/include/llvm/Transforms/Utils/CodeExtractor.h @@ -92,6 +92,11 @@ BranchProbabilityInfo *BPI; AssumptionCache *AC; + // A block outside of the extraction set where any intermediate + // allocations will be placed inside. If this is null, allocations + // will be placed in the entry block of the function. + BasicBlock *AllocationBlock; + // If true, varargs functions can be extracted. bool AllowVarArgs; @@ -120,11 +125,15 @@ /// code is extracted, including vastart. If AllowAlloca is true, then /// extraction of blocks containing alloca instructions would be possible, /// however code extractor won't validate whether extraction is legal. + /// Any new allocations will be placed in the AllocationBlock, unless + /// it is null, in which case it will be placed in the entry block of + /// the function from which the code is being extracted. CodeExtractor(ArrayRef BBs, DominatorTree *DT = nullptr, bool AggregateArgs = false, BlockFrequencyInfo *BFI = nullptr, BranchProbabilityInfo *BPI = nullptr, - AssumptionCache *AC = nullptr, - bool AllowVarArgs = false, bool AllowAlloca = false, + AssumptionCache *AC = nullptr, bool AllowVarArgs = false, + bool AllowAlloca = false, + BasicBlock *AllocationBlock = nullptr, std::string Suffix = ""); /// Create a code extractor for a loop body. diff --git a/llvm/include/llvm/module.modulemap b/llvm/include/llvm/module.modulemap --- a/llvm/include/llvm/module.modulemap +++ b/llvm/include/llvm/module.modulemap @@ -254,6 +254,7 @@ module IR_InstrTypes { header "IR/InstrTypes.h" export * } module IR_Instructions { header "IR/Instructions.h" export * } module IR_TypeFinder { header "IR/TypeFinder.h" export * } + module IR_VectorBuilder { header "IR/VectorBuilder.h" export * } // Intrinsics.h diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -142,6 +142,12 @@ STATISTIC(NumBruteForceTripCountsComputed, "Number of loops with trip counts computed by force"); +#ifdef EXPENSIVE_CHECKS +bool llvm::VerifySCEV = true; +#else +bool llvm::VerifySCEV = false; +#endif + static cl::opt MaxBruteForceIterations("scalar-evolution-max-iterations", cl::ReallyHidden, cl::ZeroOrMore, @@ -150,9 +156,8 @@ "derived loop"), cl::init(100)); -// FIXME: Enable this with EXPENSIVE_CHECKS when the test suite is clean. -static cl::opt VerifySCEV( - "verify-scev", cl::Hidden, +static cl::opt VerifySCEVOpt( + "verify-scev", cl::Hidden, cl::location(VerifySCEV), cl::desc("Verify ScalarEvolution's backedge taken counts (slow)")); static cl::opt VerifySCEVStrict( "verify-scev-strict", cl::Hidden, @@ -526,12 +531,13 @@ } void SCEVUnknown::allUsesReplacedWith(Value *New) { + // Clear this SCEVUnknown from various maps. + SE->forgetMemoizedResults(this); + // Remove this SCEVUnknown from the uniquing map. SE->UniqueSCEVs.RemoveNode(this); - // Update this SCEVUnknown to point to the new value. This is needed - // because there may still be outstanding SCEVs which still point to - // this SCEVUnknown. + // Replace the value pointer in case someone is still using this SCEVUnknown. setValPtr(New); } @@ -13358,8 +13364,14 @@ if (!ReachableBlocks.contains(L->getHeader())) continue; - auto *CurBECount = SCM.visit( - const_cast(this)->getBackedgeTakenCount(L)); + // Only verify cached BECounts. Computing new BECounts may change the + // results of subsequent SCEV uses. + auto It = BackedgeTakenCounts.find(L); + if (It == BackedgeTakenCounts.end()) + continue; + + auto *CurBECount = + SCM.visit(It->second.getExact(L, const_cast(this))); auto *NewBECount = SE2.getBackedgeTakenCount(L); if (CurBECount == SE2.getCouldNotCompute() || diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -21639,9 +21639,10 @@ SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT); if (SVT != VT.getScalarType()) for (SDValue &Op : Ops) - Op = TLI.isZExtFree(Op.getValueType(), SVT) - ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT) - : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT); + Op = Op.isUndef() ? DAG.getUNDEF(SVT) + : (TLI.isZExtFree(Op.getValueType(), SVT) + ? DAG.getZExtOrTrunc(Op, SDLoc(SVN), SVT) + : DAG.getSExtOrTrunc(Op, SDLoc(SVN), SVT)); return DAG.getBuildVector(VT, SDLoc(SVN), Ops); } diff --git a/llvm/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp b/llvm/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp --- a/llvm/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp +++ b/llvm/lib/DebugInfo/PDB/Native/PDBStringTableBuilder.cpp @@ -71,7 +71,7 @@ // This list contains all StringCount, BucketCount pairs where BucketCount was // just incremented. It ends before the first BucketCount entry where // BucketCount * 3 would overflow a 32-bit unsigned int. - static std::map StringsToBuckets = { + static const std::pair StringsToBuckets[] = { {0, 1}, {1, 2}, {2, 4}, @@ -124,8 +124,9 @@ {517197275, 1034394550}, {775795913, 1551591826}, {1163693870, 2327387740}}; - auto Entry = StringsToBuckets.lower_bound(NumStrings); - assert(Entry != StringsToBuckets.end()); + const auto *Entry = llvm::lower_bound( + StringsToBuckets, std::make_pair(NumStrings, 0U), llvm::less_first()); + assert(Entry != std::end(StringsToBuckets)); return Entry->second; } diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -300,6 +300,7 @@ /* AssumptionCache */ nullptr, /* AllowVarArgs */ true, /* AllowAlloca */ true, + /* AllocaBlock*/ OI.OuterAllocaBB, /* Suffix */ ".omp_par"); LLVM_DEBUG(dbgs() << "Before outlining: " << *OuterFn << "\n"); @@ -878,6 +879,7 @@ InsertPointTy PreFiniIP(PRegPreFiniBB, PRegPreFiniTI->getIterator()); FiniCB(PreFiniIP); + OI.OuterAllocaBB = OuterAllocaBlock; OI.EntryBB = PRegEntryBB; OI.ExitBB = PRegExitBB; @@ -901,6 +903,7 @@ /* AssumptionCache */ nullptr, /* AllowVarArgs */ true, /* AllowAlloca */ true, + /* AllocationBlock */ OuterAllocaBlock, /* Suffix */ ".omp_par"); // Find inputs to, outputs from the code region. diff --git a/llvm/lib/IR/CMakeLists.txt b/llvm/lib/IR/CMakeLists.txt --- a/llvm/lib/IR/CMakeLists.txt +++ b/llvm/lib/IR/CMakeLists.txt @@ -61,6 +61,7 @@ User.cpp Value.cpp ValueSymbolTable.cpp + VectorBuilder.cpp Verifier.cpp ADDITIONAL_HEADER_DIRS diff --git a/llvm/lib/IR/Value.cpp b/llvm/lib/IR/Value.cpp --- a/llvm/lib/IR/Value.cpp +++ b/llvm/lib/IR/Value.cpp @@ -964,6 +964,9 @@ return Align(CI->getLimitedValue()); } } else if (auto *CstPtr = dyn_cast(this)) { + // Strip pointer casts to avoid creating unnecessary ptrtoint expression + // if the only "reduction" is combining a bitcast + ptrtoint. + CstPtr = CstPtr->stripPointerCasts(); if (auto *CstInt = dyn_cast_or_null(ConstantExpr::getPtrToInt( const_cast(CstPtr), DL.getIntPtrType(getType()), /*OnlyIfReduced=*/true))) { diff --git a/llvm/lib/IR/VectorBuilder.cpp b/llvm/lib/IR/VectorBuilder.cpp new file mode 100644 --- /dev/null +++ b/llvm/lib/IR/VectorBuilder.cpp @@ -0,0 +1,103 @@ +//===- VectorBuilder.cpp - Builder for VP Intrinsics ----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the VectorBuilder class, which is used as a convenient +// way to create VP intrinsics as if they were LLVM instructions with a +// consistent and simplified interface. +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include +#include +#include + +namespace llvm { + +void VectorBuilder::handleError(const char *ErrorMsg) const { + if (ErrorHandling == Behavior::SilentlyReturnNone) + return; + report_fatal_error(ErrorMsg); +} + +Module &VectorBuilder::getModule() const { + return *Builder.GetInsertBlock()->getModule(); +} + +Value *VectorBuilder::getAllTrueMask() { + auto *BoolTy = Builder.getInt1Ty(); + auto *MaskTy = VectorType::get(BoolTy, StaticVectorLength); + return ConstantInt::getAllOnesValue(MaskTy); +} + +Value &VectorBuilder::requestMask() { + if (Mask) + return *Mask; + + return *getAllTrueMask(); +} + +Value &VectorBuilder::requestEVL() { + if (ExplicitVectorLength) + return *ExplicitVectorLength; + + assert(!StaticVectorLength.isScalable() && "TODO vscale lowering"); + auto *IntTy = Builder.getInt32Ty(); + return *ConstantInt::get(IntTy, StaticVectorLength.getFixedValue()); +} + +Value *VectorBuilder::createVectorInstruction(unsigned Opcode, Type *ReturnTy, + ArrayRef InstOpArray, + const Twine &Name) { + auto VPID = VPIntrinsic::getForOpcode(Opcode); + if (VPID == Intrinsic::not_intrinsic) + return returnWithError("No VPIntrinsic for this opcode"); + + auto MaskPosOpt = VPIntrinsic::getMaskParamPos(VPID); + auto VLenPosOpt = VPIntrinsic::getVectorLengthParamPos(VPID); + size_t NumInstParams = InstOpArray.size(); + size_t NumVPParams = + NumInstParams + MaskPosOpt.hasValue() + VLenPosOpt.hasValue(); + + SmallVector IntrinParams; + + // Whether the mask and vlen parameter are at the end of the parameter list. + bool TrailingMaskAndVLen = + std::min(MaskPosOpt.getValueOr(NumInstParams), + VLenPosOpt.getValueOr(NumInstParams)) >= NumInstParams; + + if (TrailingMaskAndVLen) { + // Fast path for trailing mask, vector length. + IntrinParams.append(InstOpArray.begin(), InstOpArray.end()); + IntrinParams.resize(NumVPParams); + } else { + IntrinParams.resize(NumVPParams); + // Insert mask and evl operands in between the instruction operands. + for (size_t VPParamIdx = 0, ParamIdx = 0; VPParamIdx < NumVPParams; + ++VPParamIdx) { + if ((MaskPosOpt && MaskPosOpt.getValueOr(NumVPParams) == VPParamIdx) || + (VLenPosOpt && VLenPosOpt.getValueOr(NumVPParams) == VPParamIdx)) + continue; + assert(ParamIdx < NumInstParams); + IntrinParams[VPParamIdx] = InstOpArray[ParamIdx++]; + } + } + + if (MaskPosOpt.hasValue()) + IntrinParams[*MaskPosOpt] = &requestMask(); + if (VLenPosOpt.hasValue()) + IntrinParams[*VLenPosOpt] = &requestEVL(); + + auto *VPDecl = VPIntrinsic::getDeclarationForParams(&getModule(), VPID, + ReturnTy, IntrinParams); + return Builder.CreateCall(VPDecl, IntrinParams, Name); +} + +} // namespace llvm diff --git a/llvm/lib/ObjCopy/MachO/MachOObjcopy.cpp b/llvm/lib/ObjCopy/MachO/MachOObjcopy.cpp --- a/llvm/lib/ObjCopy/MachO/MachOObjcopy.cpp +++ b/llvm/lib/ObjCopy/MachO/MachOObjcopy.cpp @@ -258,6 +258,21 @@ if (!MachOConfig.RPathToPrepend.empty()) Obj.updateLoadCommandIndexes(); + // Remove any empty segments if required. + if (!MachOConfig.EmptySegmentsToRemove.empty()) { + auto RemovePred = [&MachOConfig](const LoadCommand &LC) { + if (LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_SEGMENT_64 || + LC.MachOLoadCommand.load_command_data.cmd == MachO::LC_SEGMENT) { + return LC.Sections.empty() && + MachOConfig.EmptySegmentsToRemove.contains( + LC.getSegmentName().getValue()); + } + return false; + }; + if (Error E = Obj.removeLoadCommands(RemovePred)) + return E; + } + return Error::success(); } diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td --- a/llvm/lib/Target/AArch64/AArch64.td +++ b/llvm/lib/Target/AArch64/AArch64.td @@ -532,7 +532,11 @@ FeaturePAuth, FeatureRCPC, //v8.4 FeatureDotProd, FeatureTRACEV8_4, FeatureTLB_RMI, - FeatureFlagM, FeatureDIT, FeatureSEL2, FeatureRCPC_IMMO]>; + FeatureFlagM, FeatureDIT, FeatureSEL2, FeatureRCPC_IMMO, + // Not mandatory in v8.0-R, but included here on the grounds that it + // only enables names of system registers + FeatureSpecRestrict + ]>; //===----------------------------------------------------------------------===// // Register File Description @@ -968,7 +972,7 @@ FeatureSVE2BitPerm, FeatureBF16, FeatureMatMulInt8]; list R82 = [HasV8_0rOps, FeaturePerfMon, FeatureFullFP16, FeatureFP16FML, FeatureSSBS, FeaturePredRes, - FeatureSB, FeatureSpecRestrict]; + FeatureSB]; list X1 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8, FeatureNEON, FeatureRCPC, FeaturePerfMon, FeatureSPE, FeatureFullFP16, FeatureDotProd]; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -9252,6 +9252,56 @@ return true; } +// Detect patterns of a0,a1,a2,a3,b0,b1,b2,b3,c0,c1,c2,c3,d0,d1,d2,d3 from +// v4i32s. This is really a truncate, which we can construct out of (legal) +// concats and truncate nodes. +static SDValue ReconstructTruncateFromBuildVector(SDValue V, SelectionDAG &DAG) { + if (V.getValueType() != MVT::v16i8) + return SDValue(); + assert(V.getNumOperands() == 16 && "Expected 16 operands on the BUILDVECTOR"); + + for (unsigned X = 0; X < 4; X++) { + // Check the first item in each group is an extract from lane 0 of a v4i32 + // or v4i16. + SDValue BaseExt = V.getOperand(X * 4); + if (BaseExt.getOpcode() != ISD::EXTRACT_VECTOR_ELT || + (BaseExt.getOperand(0).getValueType() != MVT::v4i16 && + BaseExt.getOperand(0).getValueType() != MVT::v4i32) || + !isa(BaseExt.getOperand(1)) || + BaseExt.getConstantOperandVal(1) != 0) + return SDValue(); + SDValue Base = BaseExt.getOperand(0); + // And check the other items are extracts from the same vector. + for (unsigned Y = 1; Y < 4; Y++) { + SDValue Ext = V.getOperand(X * 4 + Y); + if (Ext.getOpcode() != ISD::EXTRACT_VECTOR_ELT || + Ext.getOperand(0) != Base || + !isa(Ext.getOperand(1)) || + Ext.getConstantOperandVal(1) != Y) + return SDValue(); + } + } + + // Turn the buildvector into a series of truncates and concates, which will + // become uzip1's. Any v4i32s we found get truncated to v4i16, which are + // concat together to produce 2 v8i16. These are both truncated and concat + // together. + SDLoc DL(V); + SDValue Trunc[4] = { + V.getOperand(0).getOperand(0), V.getOperand(4).getOperand(0), + V.getOperand(8).getOperand(0), V.getOperand(12).getOperand(0)}; + for (int I = 0; I < 4; I++) + if (Trunc[I].getValueType() == MVT::v4i32) + Trunc[I] = DAG.getNode(ISD::TRUNCATE, DL, MVT::v4i16, Trunc[I]); + SDValue Concat0 = + DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i16, Trunc[0], Trunc[1]); + SDValue Concat1 = + DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i16, Trunc[2], Trunc[3]); + SDValue Trunc0 = DAG.getNode(ISD::TRUNCATE, DL, MVT::v8i8, Concat0); + SDValue Trunc1 = DAG.getNode(ISD::TRUNCATE, DL, MVT::v8i8, Concat1); + return DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, Trunc0, Trunc1); +} + /// Check if a vector shuffle corresponds to a DUP instructions with a larger /// element width than the vector lane type. If that is the case the function /// returns true and writes the value of the DUP instruction lane operand into @@ -10871,6 +10921,12 @@ return SDValue(); } + // Detect patterns of a0,a1,a2,a3,b0,b1,b2,b3,c0,c1,c2,c3,d0,d1,d2,d3 from + // v4i32s. This is really a truncate, which we can construct out of (legal) + // concats and truncate nodes. + if (SDValue M = ReconstructTruncateFromBuildVector(Op, DAG)) + return M; + // Empirical tests suggest this is rarely worth it for vectors of length <= 2. if (NumElts >= 4) { if (SDValue shuffle = ReconstructShuffle(Op, DAG)) @@ -12799,12 +12855,15 @@ assert(VT.isScalableVector() && "Can only lower scalable vectors"); unsigned N, Opcode; - static std::map> IntrinsicMap = { - {Intrinsic::aarch64_sve_ld2, {2, AArch64ISD::SVE_LD2_MERGE_ZERO}}, - {Intrinsic::aarch64_sve_ld3, {3, AArch64ISD::SVE_LD3_MERGE_ZERO}}, - {Intrinsic::aarch64_sve_ld4, {4, AArch64ISD::SVE_LD4_MERGE_ZERO}}}; - - std::tie(N, Opcode) = IntrinsicMap[Intrinsic]; + static const std::pair> + IntrinsicMap[] = { + {Intrinsic::aarch64_sve_ld2, {2, AArch64ISD::SVE_LD2_MERGE_ZERO}}, + {Intrinsic::aarch64_sve_ld3, {3, AArch64ISD::SVE_LD3_MERGE_ZERO}}, + {Intrinsic::aarch64_sve_ld4, {4, AArch64ISD::SVE_LD4_MERGE_ZERO}}}; + + std::tie(N, Opcode) = llvm::find_if(IntrinsicMap, [&](auto P) { + return P.first == Intrinsic; + })->second; assert(VT.getVectorElementCount().getKnownMinValue() % N == 0 && "invalid tuple vector type!"); diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td --- a/llvm/lib/Target/AMDGPU/AMDGPU.td +++ b/llvm/lib/Target/AMDGPU/AMDGPU.td @@ -737,6 +737,12 @@ "Hardware automatically inserts waitcnt before barrier" >; +def FeatureBackOffBarrier : SubtargetFeature <"back-off-barrier", + "BackOffBarrier", + "true", + "Hardware supports backing off s_barrier if an exception occurs" +>; + def FeatureTrigReducedRange : SubtargetFeature<"trig-reduced-range", "HasTrigReducedRange", "true", @@ -1025,7 +1031,8 @@ FeatureMadMacF32Insts, FeatureSupportsSRAMECC, FeaturePackedTID, - FullRate64Ops]>; + FullRate64Ops, + FeatureBackOffBarrier]>; def FeatureISAVersion9_0_C : FeatureSet< [FeatureGFX9, @@ -1059,7 +1066,8 @@ FeatureSupportsSRAMECC, FeaturePackedTID, FeatureArchitectedFlatScratch, - FullRate64Ops]>; + FullRate64Ops, + FeatureBackOffBarrier]>; // TODO: Organize more features into groups. def FeatureGroup { @@ -1094,7 +1102,8 @@ FeatureMadMacF32Insts, FeatureDsSrc2Insts, FeatureLdsMisalignedBug, - FeatureSupportsXNACK])>; + FeatureSupportsXNACK, + FeatureBackOffBarrier])>; def FeatureISAVersion10_1_1 : FeatureSet< !listconcat(FeatureGroup.GFX10_1_Bugs, @@ -1116,7 +1125,8 @@ FeatureMadMacF32Insts, FeatureDsSrc2Insts, FeatureLdsMisalignedBug, - FeatureSupportsXNACK])>; + FeatureSupportsXNACK, + FeatureBackOffBarrier])>; def FeatureISAVersion10_1_2 : FeatureSet< !listconcat(FeatureGroup.GFX10_1_Bugs, @@ -1138,7 +1148,8 @@ FeatureMadMacF32Insts, FeatureDsSrc2Insts, FeatureLdsMisalignedBug, - FeatureSupportsXNACK])>; + FeatureSupportsXNACK, + FeatureBackOffBarrier])>; def FeatureISAVersion10_1_3 : FeatureSet< !listconcat(FeatureGroup.GFX10_1_Bugs, @@ -1156,7 +1167,8 @@ FeatureMadMacF32Insts, FeatureDsSrc2Insts, FeatureLdsMisalignedBug, - FeatureSupportsXNACK])>; + FeatureSupportsXNACK, + FeatureBackOffBarrier])>; def FeatureISAVersion10_3_0 : FeatureSet< [FeatureGFX10, @@ -1173,7 +1185,8 @@ FeatureNSAEncoding, FeatureNSAMaxSize13, FeatureWavefrontSize32, - FeatureShaderCyclesRegister]>; + FeatureShaderCyclesRegister, + FeatureBackOffBarrier]>; //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h --- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h +++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h @@ -72,6 +72,7 @@ // Dynamically set bits that enable features. bool FlatForGlobal; bool AutoWaitcntBeforeBarrier; + bool BackOffBarrier; bool UnalignedScratchAccess; bool UnalignedAccessMode; bool HasApertureRegs; @@ -493,6 +494,12 @@ return AutoWaitcntBeforeBarrier; } + /// \returns true if the target supports backing off of s_barrier instructions + /// when an exception is raised. + bool supportsBackOffBarrier() const { + return BackOffBarrier; + } + bool hasUnalignedBufferAccess() const { return UnalignedBufferAccess; } diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -1135,12 +1135,12 @@ } } - // Check to see if this is an S_BARRIER, and if an implicit S_WAITCNT 0 - // occurs before the instruction. Doing it here prevents any additional - // S_WAITCNTs from being emitted if the instruction was marked as - // requiring a WAITCNT beforehand. + // The subtarget may have an implicit S_WAITCNT 0 before barriers. If it does + // not, we need to ensure the subtarget is capable of backing off barrier + // instructions in case there are any outstanding memory operations that may + // cause an exception. Otherwise, insert an explicit S_WAITCNT 0 here. if (MI.getOpcode() == AMDGPU::S_BARRIER && - !ST->hasAutoWaitcntBeforeBarrier()) { + !ST->hasAutoWaitcntBeforeBarrier() && !ST->supportsBackOffBarrier()) { Wait = Wait.combined(AMDGPU::Waitcnt::allZero(ST->hasVscnt())); } diff --git a/llvm/lib/Target/Hexagon/HexagonArch.h b/llvm/lib/Target/Hexagon/HexagonArch.h deleted file mode 100644 --- a/llvm/lib/Target/Hexagon/HexagonArch.h +++ /dev/null @@ -1,31 +0,0 @@ -//===- HexagonArch.h ------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONARCH_H -#define LLVM_LIB_TARGET_HEXAGON_HEXAGONARCH_H - -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/Optional.h" -#include "llvm/ADT/StringRef.h" -#include "HexagonDepArch.h" -#include - -namespace llvm { -namespace Hexagon { - -template -llvm::Optional GetCpu(ArchCont const &ArchList, Val CPUString) { - llvm::Optional Res; - auto Entry = ArchList.find(CPUString); - if (Entry != ArchList.end()) - Res = Entry->second; - return Res; -} -} // namespace Hexagon -} // namespace llvm -#endif // LLVM_LIB_TARGET_HEXAGON_HEXAGONARCH_H diff --git a/llvm/lib/Target/Hexagon/HexagonDepArch.h b/llvm/lib/Target/Hexagon/HexagonDepArch.h --- a/llvm/lib/Target/Hexagon/HexagonDepArch.h +++ b/llvm/lib/Target/Hexagon/HexagonDepArch.h @@ -12,82 +12,28 @@ #ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONDEPARCH_H #define LLVM_LIB_TARGET_HEXAGON_HEXAGONDEPARCH_H -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/BinaryFormat/ELF.h" - -#include -#include +#include "llvm/ADT/StringSwitch.h" namespace llvm { namespace Hexagon { enum class ArchEnum { NoArch, Generic, V5, V55, V60, V62, V65, V66, V67, V68, V69 }; -static constexpr unsigned ArchValsNumArray[] = {5, 55, 60, 62, 65, 66, 67, 68, 69}; -static constexpr ArrayRef ArchValsNum(ArchValsNumArray); - -static constexpr StringLiteral ArchValsTextArray[] = { "v5", "v55", "v60", "v62", "v65", "v66", "v67", "v68", "v69" }; -static constexpr ArrayRef ArchValsText(ArchValsTextArray); - -static constexpr StringLiteral CpuValsTextArray[] = { "hexagonv5", "hexagonv55", "hexagonv60", "hexagonv62", "hexagonv65", "hexagonv66", "hexagonv67", "hexagonv67t", "hexagonv68", "hexagonv69" }; -static constexpr ArrayRef CpuValsText(CpuValsTextArray); - -static constexpr StringLiteral CpuNickTextArray[] = { "v5", "v55", "v60", "v62", "v65", "v66", "v67", "v67t", "v68", "v69" }; -static constexpr ArrayRef CpuNickText(CpuNickTextArray); - -static const std::map CpuTable{ - {"generic", Hexagon::ArchEnum::V5}, - {"hexagonv5", Hexagon::ArchEnum::V5}, - {"hexagonv55", Hexagon::ArchEnum::V55}, - {"hexagonv60", Hexagon::ArchEnum::V60}, - {"hexagonv62", Hexagon::ArchEnum::V62}, - {"hexagonv65", Hexagon::ArchEnum::V65}, - {"hexagonv66", Hexagon::ArchEnum::V66}, - {"hexagonv67", Hexagon::ArchEnum::V67}, - {"hexagonv67t", Hexagon::ArchEnum::V67}, - {"hexagonv68", Hexagon::ArchEnum::V68}, - {"hexagonv69", Hexagon::ArchEnum::V69}, -}; - -static const std::map ElfFlagsByCpuStr = { - {"generic", llvm::ELF::EF_HEXAGON_MACH_V5}, - {"hexagonv5", llvm::ELF::EF_HEXAGON_MACH_V5}, - {"hexagonv55", llvm::ELF::EF_HEXAGON_MACH_V55}, - {"hexagonv60", llvm::ELF::EF_HEXAGON_MACH_V60}, - {"hexagonv62", llvm::ELF::EF_HEXAGON_MACH_V62}, - {"hexagonv65", llvm::ELF::EF_HEXAGON_MACH_V65}, - {"hexagonv66", llvm::ELF::EF_HEXAGON_MACH_V66}, - {"hexagonv67", llvm::ELF::EF_HEXAGON_MACH_V67}, - {"hexagonv67t", llvm::ELF::EF_HEXAGON_MACH_V67T}, - {"hexagonv68", llvm::ELF::EF_HEXAGON_MACH_V68}, - {"hexagonv69", llvm::ELF::EF_HEXAGON_MACH_V69}, -}; -static const std::map ElfArchByMachFlags = { - {llvm::ELF::EF_HEXAGON_MACH_V5, "V5"}, - {llvm::ELF::EF_HEXAGON_MACH_V55, "V55"}, - {llvm::ELF::EF_HEXAGON_MACH_V60, "V60"}, - {llvm::ELF::EF_HEXAGON_MACH_V62, "V62"}, - {llvm::ELF::EF_HEXAGON_MACH_V65, "V65"}, - {llvm::ELF::EF_HEXAGON_MACH_V66, "V66"}, - {llvm::ELF::EF_HEXAGON_MACH_V67, "V67"}, - {llvm::ELF::EF_HEXAGON_MACH_V67T, "V67T"}, - {llvm::ELF::EF_HEXAGON_MACH_V68, "V68"}, - {llvm::ELF::EF_HEXAGON_MACH_V69, "V69"}, -}; -static const std::map ElfCpuByMachFlags = { - {llvm::ELF::EF_HEXAGON_MACH_V5, "hexagonv5"}, - {llvm::ELF::EF_HEXAGON_MACH_V55, "hexagonv55"}, - {llvm::ELF::EF_HEXAGON_MACH_V60, "hexagonv60"}, - {llvm::ELF::EF_HEXAGON_MACH_V62, "hexagonv62"}, - {llvm::ELF::EF_HEXAGON_MACH_V65, "hexagonv65"}, - {llvm::ELF::EF_HEXAGON_MACH_V66, "hexagonv66"}, - {llvm::ELF::EF_HEXAGON_MACH_V67, "hexagonv67"}, - {llvm::ELF::EF_HEXAGON_MACH_V67T, "hexagonv67t"}, - {llvm::ELF::EF_HEXAGON_MACH_V68, "hexagonv68"}, - {llvm::ELF::EF_HEXAGON_MACH_V69, "hexagonv69"}, -}; - +inline Optional getCpu(StringRef CPU) { + return StringSwitch>(CPU) + .Case("generic", Hexagon::ArchEnum::V5) + .Case("hexagonv5", Hexagon::ArchEnum::V5) + .Case("hexagonv55", Hexagon::ArchEnum::V55) + .Case("hexagonv60", Hexagon::ArchEnum::V60) + .Case("hexagonv62", Hexagon::ArchEnum::V62) + .Case("hexagonv65", Hexagon::ArchEnum::V65) + .Case("hexagonv66", Hexagon::ArchEnum::V66) + .Case("hexagonv67", Hexagon::ArchEnum::V67) + .Case("hexagonv67t", Hexagon::ArchEnum::V67) + .Case("hexagonv68", Hexagon::ArchEnum::V68) + .Case("hexagonv69", Hexagon::ArchEnum::V69) + .Default(None); +} } // namespace Hexagon -} // namespace llvm; +} // namespace llvm #endif // LLVM_LIB_TARGET_HEXAGON_HEXAGONDEPARCH_H diff --git a/llvm/lib/Target/Hexagon/HexagonSubtarget.h b/llvm/lib/Target/Hexagon/HexagonSubtarget.h --- a/llvm/lib/Target/Hexagon/HexagonSubtarget.h +++ b/llvm/lib/Target/Hexagon/HexagonSubtarget.h @@ -13,7 +13,7 @@ #ifndef LLVM_LIB_TARGET_HEXAGON_HEXAGONSUBTARGET_H #define LLVM_LIB_TARGET_HEXAGON_HEXAGONSUBTARGET_H -#include "HexagonArch.h" +#include "HexagonDepArch.h" #include "HexagonFrameLowering.h" #include "HexagonISelLowering.h" #include "HexagonInstrInfo.h" diff --git a/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp b/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp --- a/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp +++ b/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp @@ -95,8 +95,7 @@ HexagonSubtarget & HexagonSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) { - Optional ArchVer = - Hexagon::GetCpu(Hexagon::CpuTable, CPUString); + Optional ArchVer = Hexagon::getCpu(CPUString); if (ArchVer) HexagonArchVersion = *ArchVer; else diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp --- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp +++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp @@ -11,7 +11,7 @@ //===----------------------------------------------------------------------===// #include "MCTargetDesc/HexagonMCTargetDesc.h" -#include "HexagonArch.h" +#include "HexagonDepArch.h" #include "HexagonTargetStreamer.h" #include "MCTargetDesc/HexagonInstPrinter.h" #include "MCTargetDesc/HexagonMCAsmInfo.h" @@ -410,8 +410,8 @@ } } -static bool isCPUValid(const std::string &CPU) { - return Hexagon::CpuTable.find(CPU) != Hexagon::CpuTable.cend(); +static bool isCPUValid(StringRef CPU) { + return Hexagon::getCpu(CPU).hasValue(); } namespace { @@ -560,12 +560,18 @@ } unsigned Hexagon_MC::GetELFFlags(const MCSubtargetInfo &STI) { - using llvm::Hexagon::ElfFlagsByCpuStr; - - const std::string CPU(STI.getCPU().str()); - auto F = ElfFlagsByCpuStr.find(CPU); - assert(F != ElfFlagsByCpuStr.end() && "Unrecognized Architecture"); - return F->second; + return StringSwitch(STI.getCPU()) + .Case("generic", llvm::ELF::EF_HEXAGON_MACH_V5) + .Case("hexagonv5", llvm::ELF::EF_HEXAGON_MACH_V5) + .Case("hexagonv55", llvm::ELF::EF_HEXAGON_MACH_V55) + .Case("hexagonv60", llvm::ELF::EF_HEXAGON_MACH_V60) + .Case("hexagonv62", llvm::ELF::EF_HEXAGON_MACH_V62) + .Case("hexagonv65", llvm::ELF::EF_HEXAGON_MACH_V65) + .Case("hexagonv66", llvm::ELF::EF_HEXAGON_MACH_V66) + .Case("hexagonv67", llvm::ELF::EF_HEXAGON_MACH_V67) + .Case("hexagonv67t", llvm::ELF::EF_HEXAGON_MACH_V67T) + .Case("hexagonv68", llvm::ELF::EF_HEXAGON_MACH_V68) + .Case("hexagonv69", llvm::ELF::EF_HEXAGON_MACH_V69); } llvm::ArrayRef Hexagon_MC::GetVectRegRev() { diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td --- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td +++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td @@ -33,7 +33,9 @@ } def uimm2 : Operand; -def uimm2_plus1 : Operand; +def uimm2_plus1 : Operand { + let EncoderMethod = "getImmOpValueSub1"; +} def uimm3 : Operand; def uimm5 : Operand; def uimm6 : Operand; @@ -41,12 +43,20 @@ def uimm15 : Operand; def simm12 : Operand, ImmLeaf(Imm);}]>; def simm14 : Operand; -def simm14_lsl2 : Operand; +def simm14_lsl2 : Operand { + let EncoderMethod = "getImmOpValueAsr2"; +} def simm16 : Operand; -def simm16_lsl2 : Operand; +def simm16_lsl2 : Operand { + let EncoderMethod = "getImmOpValueAsr2"; +} def simm20 : Operand; -def simm21_lsl2 : Operand; -def simm26_lsl2 : Operand; +def simm21_lsl2 : Operand { + let EncoderMethod = "getImmOpValueAsr2"; +} +def simm26_lsl2 : Operand { + let EncoderMethod = "getImmOpValueAsr2"; +} //===----------------------------------------------------------------------===// // Instruction Formats diff --git a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp --- a/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp +++ b/llvm/lib/Target/LoongArch/MCTargetDesc/LoongArchMCCodeEmitter.cpp @@ -51,6 +51,23 @@ unsigned getMachineOpValue(const MCInst &MI, const MCOperand &MO, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const; + + /// Return binary encoding of an immediate operand specified by OpNo. + /// The value returned is the value of the immediate minus 1. + /// Note that this function is dedicated to specific immediate types, + /// e.g. uimm2_plus1. + unsigned getImmOpValueSub1(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; + + /// Return binary encoding of an immediate operand specified by OpNo. + /// The value returned is the value of the immediate shifted right + // arithmetically by 2. + /// Note that this function is dedicated to specific immediate types, + /// e.g. simm14_lsl2, simm16_lsl2, simm21_lsl2 and simm26_lsl2. + unsigned getImmOpValueAsr2(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; }; } // end anonymous namespace @@ -68,6 +85,22 @@ llvm_unreachable("Unhandled expression!"); } +unsigned +LoongArchMCCodeEmitter::getImmOpValueSub1(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + return MI.getOperand(OpNo).getImm() - 1; +} + +unsigned +LoongArchMCCodeEmitter::getImmOpValueAsr2(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { + unsigned Res = MI.getOperand(OpNo).getImm(); + assert((Res & 3) == 0 && "lowest 2 bits are non-zero"); + return Res >> 2; +} + void LoongArchMCCodeEmitter::encodeInstruction( const MCInst &MI, raw_ostream &OS, SmallVectorImpl &Fixups, const MCSubtargetInfo &STI) const { diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -627,6 +627,8 @@ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::f64, Custom); setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::ppcf128, Custom); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v2f64, Custom); // To handle counter-based loop conditions. setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i1, Custom); @@ -10549,6 +10551,16 @@ DAG.getTargetConstant(PPC::PRED_EQ, dl, MVT::i32)}), 0); } + case Intrinsic::ppc_fnmsub: { + EVT VT = Op.getOperand(1).getValueType(); + if (!Subtarget.hasVSX() || (!Subtarget.hasFloat128() && VT == MVT::f128)) + return DAG.getNode( + ISD::FNEG, dl, VT, + DAG.getNode(ISD::FMA, dl, VT, Op.getOperand(1), Op.getOperand(2), + DAG.getNode(ISD::FNEG, dl, VT, Op.getOperand(3)))); + return DAG.getNode(PPCISD::FNMSUB, dl, VT, Op.getOperand(1), + Op.getOperand(2), Op.getOperand(3)); + } case Intrinsic::ppc_convert_f128_to_ppcf128: case Intrinsic::ppc_convert_ppcf128_to_f128: { RTLIB::Libcall LC = IntrinsicID == Intrinsic::ppc_convert_ppcf128_to_f128 @@ -11220,6 +11232,7 @@ Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128, N->getOperand(2), N->getOperand(1))); break; + case Intrinsic::ppc_fnmsub: case Intrinsic::ppc_convert_f128_to_ppcf128: Results.push_back(LowerINTRINSIC_WO_CHAIN(SDValue(N, 0), DAG)); break; diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -3728,8 +3728,6 @@ // XL Compat intrinsics. def : Pat<(int_ppc_fmsub f64:$A, f64:$B, f64:$C), (FMSUB $A, $B, $C)>; def : Pat<(int_ppc_fmsubs f32:$A, f32:$B, f32:$C), (FMSUBS $A, $B, $C)>; -def : Pat<(int_ppc_fnmsub f64:$A, f64:$B, f64:$C), (FNMSUB $A, $B, $C)>; -def : Pat<(int_ppc_fnmsubs f32:$A, f32:$B, f32:$C), (FNMSUBS $A, $B, $C)>; def : Pat<(int_ppc_fnmadd f64:$A, f64:$B, f64:$C), (FNMADD $A, $B, $C)>; def : Pat<(int_ppc_fnmadds f32:$A, f32:$B, f32:$C), (FNMADDS $A, $B, $C)>; def : Pat<(int_ppc_fre f64:$A), (FRE $A)>; diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td --- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -2897,7 +2897,6 @@ // XL Compat builtins. def : Pat<(int_ppc_fmsub f64:$A, f64:$B, f64:$C), (XSMSUBMDP $A, $B, $C)>; -def : Pat<(int_ppc_fnmsub f64:$A, f64:$B, f64:$C), (XSNMSUBMDP $A, $B, $C)>; def : Pat<(int_ppc_fnmadd f64:$A, f64:$B, f64:$C), (XSNMADDMDP $A, $B, $C)>; def : Pat<(int_ppc_fre f64:$A), (XSREDP $A)>; def : Pat<(int_ppc_frsqrte vsfrc:$XB), (XSRSQRTEDP $XB)>; @@ -3311,7 +3310,6 @@ // XL Compat builtins. def : Pat<(int_ppc_fmsubs f32:$A, f32:$B, f32:$C), (XSMSUBMSP $A, $B, $C)>; -def : Pat<(int_ppc_fnmsubs f32:$A, f32:$B, f32:$C), (XSNMSUBMSP $A, $B, $C)>; def : Pat<(int_ppc_fnmadds f32:$A, f32:$B, f32:$C), (XSNMADDMSP $A, $B, $C)>; def : Pat<(int_ppc_fres f32:$A), (XSRESP $A)>; def : Pat<(i32 (int_ppc_extract_exp f64:$A)), diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -1027,45 +1027,44 @@ } MVT Src1VT = Src1.getSimpleValueType(); unsigned VMSLTOpcode, VMSLTMaskOpcode, VMXOROpcode, VMANDNOpcode, - VMSetOpcode, VMANDOpcode; + VMOROpcode; switch (RISCVTargetLowering::getLMUL(Src1VT)) { default: llvm_unreachable("Unexpected LMUL!"); -#define CASE_VMSLT_VMSET_OPCODES(lmulenum, suffix, suffix_b) \ +#define CASE_VMSLT_OPCODES(lmulenum, suffix, suffix_b) \ case RISCVII::VLMUL::lmulenum: \ VMSLTOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix \ : RISCV::PseudoVMSLT_VX_##suffix; \ VMSLTMaskOpcode = IsUnsigned ? RISCV::PseudoVMSLTU_VX_##suffix##_MASK \ : RISCV::PseudoVMSLT_VX_##suffix##_MASK; \ - VMSetOpcode = RISCV::PseudoVMSET_M_##suffix_b; \ break; - CASE_VMSLT_VMSET_OPCODES(LMUL_F8, MF8, B1) - CASE_VMSLT_VMSET_OPCODES(LMUL_F4, MF4, B2) - CASE_VMSLT_VMSET_OPCODES(LMUL_F2, MF2, B4) - CASE_VMSLT_VMSET_OPCODES(LMUL_1, M1, B8) - CASE_VMSLT_VMSET_OPCODES(LMUL_2, M2, B16) - CASE_VMSLT_VMSET_OPCODES(LMUL_4, M4, B32) - CASE_VMSLT_VMSET_OPCODES(LMUL_8, M8, B64) -#undef CASE_VMSLT_VMSET_OPCODES + CASE_VMSLT_OPCODES(LMUL_F8, MF8, B1) + CASE_VMSLT_OPCODES(LMUL_F4, MF4, B2) + CASE_VMSLT_OPCODES(LMUL_F2, MF2, B4) + CASE_VMSLT_OPCODES(LMUL_1, M1, B8) + CASE_VMSLT_OPCODES(LMUL_2, M2, B16) + CASE_VMSLT_OPCODES(LMUL_4, M4, B32) + CASE_VMSLT_OPCODES(LMUL_8, M8, B64) +#undef CASE_VMSLT_OPCODES } // Mask operations use the LMUL from the mask type. switch (RISCVTargetLowering::getLMUL(VT)) { default: llvm_unreachable("Unexpected LMUL!"); -#define CASE_VMXOR_VMANDN_VMAND_OPCODES(lmulenum, suffix) \ +#define CASE_VMXOR_VMANDN_VMOR_OPCODES(lmulenum, suffix) \ case RISCVII::VLMUL::lmulenum: \ VMXOROpcode = RISCV::PseudoVMXOR_MM_##suffix; \ VMANDNOpcode = RISCV::PseudoVMANDN_MM_##suffix; \ - VMANDOpcode = RISCV::PseudoVMAND_MM_##suffix; \ + VMOROpcode = RISCV::PseudoVMOR_MM_##suffix; \ break; - CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_F8, MF8) - CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_F4, MF4) - CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_F2, MF2) - CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_1, M1) - CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_2, M2) - CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_4, M4) - CASE_VMXOR_VMANDN_VMAND_OPCODES(LMUL_8, M8) -#undef CASE_VMXOR_VMANDN_VMAND_OPCODES + CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F8, MF8) + CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F4, MF4) + CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_F2, MF2) + CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_1, M1) + CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_2, M2) + CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_4, M4) + CASE_VMXOR_VMANDN_VMOR_OPCODES(LMUL_8, M8) +#undef CASE_VMXOR_VMANDN_VMOR_OPCODES } SDValue SEW = CurDAG->getTargetConstant( Log2_32(Src1VT.getScalarSizeInBits()), DL, XLenVT); @@ -1075,12 +1074,17 @@ SDValue MaskedOff = Node->getOperand(1); SDValue Mask = Node->getOperand(4); - // If vmsgeu_mask with 0 immediate, expand it to {vmset, vmand}. + // If vmsgeu_mask with 0 immediate, expand it to vmor mask, maskedoff. if (IsCmpUnsignedZero) { - SDValue VMSet = - SDValue(CurDAG->getMachineNode(VMSetOpcode, DL, VT, VL, SEW), 0); - ReplaceNode(Node, CurDAG->getMachineNode(VMANDOpcode, DL, VT, - {Mask, VMSet, VL, MaskSEW})); + // We don't need vmor if the MaskedOff and the Mask are the same + // value. + if (Mask == MaskedOff) { + ReplaceUses(Node, Mask.getNode()); + return; + } + ReplaceNode(Node, + CurDAG->getMachineNode(VMOROpcode, DL, VT, + {Mask, MaskedOff, VL, MaskSEW})); return; } diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp --- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.cpp @@ -126,7 +126,7 @@ // Frame indexes representing locations of CSRs which are given a fixed location // by save/restore libcalls. -static const std::map FixedCSRFIMap = { +static const std::pair FixedCSRFIMap[] = { {/*ra*/ RISCV::X1, -1}, {/*s0*/ RISCV::X8, -2}, {/*s1*/ RISCV::X9, -3}, @@ -149,8 +149,9 @@ if (!RVFI->useSaveRestoreLibCalls(MF)) return false; - auto FII = FixedCSRFIMap.find(Reg); - if (FII == FixedCSRFIMap.end()) + const auto *FII = + llvm::find_if(FixedCSRFIMap, [&](auto P) { return P.first == Reg; }); + if (FII == std::end(FixedCSRFIMap)) return false; FrameIdx = FII->second; diff --git a/llvm/lib/Target/VE/VECustomDAG.h b/llvm/lib/Target/VE/VECustomDAG.h --- a/llvm/lib/Target/VE/VECustomDAG.h +++ b/llvm/lib/Target/VE/VECustomDAG.h @@ -188,6 +188,11 @@ SDValue annotateLegalAVL(SDValue AVL) const; VETargetMasks getTargetSplitMask(SDValue RawMask, SDValue RawAVL, PackElem Part) const; + + // Splitting support + SDValue getSplitPtrOffset(SDValue Ptr, SDValue ByteStride, + PackElem Part) const; + SDValue getSplitPtrStride(SDValue PackStride) const; }; } // namespace llvm diff --git a/llvm/lib/Target/VE/VECustomDAG.cpp b/llvm/lib/Target/VE/VECustomDAG.cpp --- a/llvm/lib/Target/VE/VECustomDAG.cpp +++ b/llvm/lib/Target/VE/VECustomDAG.cpp @@ -155,6 +155,10 @@ return 1; case VEISD::VVP_SELECT: return 3; + case VEISD::VVP_LOAD: + return 4; + case VEISD::VVP_STORE: + return 5; } return None; @@ -431,4 +435,19 @@ return VETargetMasks(NewMask, NewAVL); } +SDValue VECustomDAG::getSplitPtrOffset(SDValue Ptr, SDValue ByteStride, + PackElem Part) const { + // High starts at base ptr but has more significant bits in the 64bit vector + // element. + if (Part == PackElem::Hi) + return Ptr; + return getNode(ISD::ADD, MVT::i64, {Ptr, ByteStride}); +} + +SDValue VECustomDAG::getSplitPtrStride(SDValue PackStride) const { + if (auto ConstBytes = dyn_cast(PackStride)) + return getConstant(2 * ConstBytes->getSExtValue(), MVT::i64); + return getNode(ISD::SHL, MVT::i64, {PackStride, getConstant(1, MVT::i32)}); +} + } // namespace llvm diff --git a/llvm/lib/Target/VE/VEISelLowering.h b/llvm/lib/Target/VE/VEISelLowering.h --- a/llvm/lib/Target/VE/VEISelLowering.h +++ b/llvm/lib/Target/VE/VEISelLowering.h @@ -189,7 +189,9 @@ SDValue lowerVVP_LOAD_STORE(SDValue Op, VECustomDAG&) const; SDValue legalizeInternalVectorOp(SDValue Op, SelectionDAG &DAG) const; + SDValue legalizeInternalLoadStoreOp(SDValue Op, VECustomDAG &CDAG) const; SDValue splitVectorOp(SDValue Op, VECustomDAG &CDAG) const; + SDValue splitPackedLoadStore(SDValue Op, VECustomDAG &CDAG) const; SDValue legalizePackedAVL(SDValue Op, VECustomDAG &CDAG) const; SDValue splitMaskArithmetic(SDValue Op, SelectionDAG &DAG) const; /// } VVPLowering diff --git a/llvm/lib/Target/VE/VEInstrInfo.cpp b/llvm/lib/Target/VE/VEInstrInfo.cpp --- a/llvm/lib/Target/VE/VEInstrInfo.cpp +++ b/llvm/lib/Target/VE/VEInstrInfo.cpp @@ -811,7 +811,7 @@ // replace to pvfmk.w.up and pvfmk.w.lo // replace to pvfmk.s.up and pvfmk.s.lo - static std::map> VFMKMap = { + static const std::pair> VFMKMap[] = { {VE::VFMKyal, {VE::VFMKLal, VE::VFMKLal}}, {VE::VFMKynal, {VE::VFMKLnal, VE::VFMKLnal}}, {VE::VFMKWyvl, {VE::PVFMKWUPvl, VE::PVFMKWLOvl}}, @@ -822,8 +822,9 @@ unsigned Opcode = MI.getOpcode(); - auto Found = VFMKMap.find(Opcode); - if (Found == VFMKMap.end()) + const auto *Found = + llvm::find_if(VFMKMap, [&](auto P) { return P.first == Opcode; }); + if (Found == std::end(VFMKMap)) report_fatal_error("unexpected opcode for pseudo vfmk"); unsigned OpcodeUpper = (*Found).second.first; diff --git a/llvm/lib/Target/VE/VVPISelLowering.cpp b/llvm/lib/Target/VE/VVPISelLowering.cpp --- a/llvm/lib/Target/VE/VVPISelLowering.cpp +++ b/llvm/lib/Target/VE/VVPISelLowering.cpp @@ -114,8 +114,6 @@ auto DataVT = *getIdiomaticVectorType(Op.getNode()); auto Packing = getTypePacking(DataVT); - assert(Packing == Packing::Normal && "TODO Packed load store isel"); - // TODO: Infer lower AVL from mask. if (!AVL) AVL = CDAG.getConstant(DataVT.getVectorNumElements(), MVT::i32); @@ -150,10 +148,117 @@ {Chain, Data, BasePtr, StrideV, Mask, AVL}); } +SDValue VETargetLowering::splitPackedLoadStore(SDValue Op, + VECustomDAG &CDAG) const { + auto VVPOC = *getVVPOpcode(Op.getOpcode()); + assert((VVPOC == VEISD::VVP_LOAD) || (VVPOC == VEISD::VVP_STORE)); + + MVT DataVT = getIdiomaticVectorType(Op.getNode())->getSimpleVT(); + assert(getTypePacking(DataVT) == Packing::Dense && + "Can only split packed load/store"); + MVT SplitDataVT = splitVectorType(DataVT); + + SDValue PassThru = getNodePassthru(Op); + assert(!PassThru && "Should have been folded in lowering to VVP layer"); + + // Analyze the operation + SDValue PackedMask = getNodeMask(Op); + SDValue PackedAVL = getAnnotatedNodeAVL(Op).first; + SDValue PackPtr = getMemoryPtr(Op); + SDValue PackData = getStoredValue(Op); + SDValue PackStride = getLoadStoreStride(Op, CDAG); + + unsigned ChainResIdx = PackData ? 0 : 1; + + SDValue PartOps[2]; + + SDValue UpperPartAVL; // we will use this for packing things back together + for (PackElem Part : {PackElem::Hi, PackElem::Lo}) { + // VP ops already have an explicit mask and AVL. When expanding from non-VP + // attach those additional inputs here. + auto SplitTM = CDAG.getTargetSplitMask(PackedMask, PackedAVL, Part); + + // Keep track of the (higher) lvl. + if (Part == PackElem::Hi) + UpperPartAVL = SplitTM.AVL; + + // Attach non-predicating value operands + SmallVector OpVec; + + // Chain + OpVec.push_back(getNodeChain(Op)); + + // Data + if (PackData) { + SDValue PartData = + CDAG.getUnpack(SplitDataVT, PackData, Part, SplitTM.AVL); + OpVec.push_back(PartData); + } + + // Ptr & Stride + // Push (ptr + ElemBytes * , 2 * ElemBytes) + // Stride info + // EVT DataVT = LegalizeVectorType(getMemoryDataVT(Op), Op, DAG, Mode); + OpVec.push_back(CDAG.getSplitPtrOffset(PackPtr, PackStride, Part)); + OpVec.push_back(CDAG.getSplitPtrStride(PackStride)); + + // Add predicating args and generate part node + OpVec.push_back(SplitTM.Mask); + OpVec.push_back(SplitTM.AVL); + + if (PackData) { + // Store + PartOps[(int)Part] = CDAG.getNode(VVPOC, MVT::Other, OpVec); + } else { + // Load + PartOps[(int)Part] = + CDAG.getNode(VVPOC, {SplitDataVT, MVT::Other}, OpVec); + } + } + + // Merge the chains + SDValue LowChain = SDValue(PartOps[(int)PackElem::Lo].getNode(), ChainResIdx); + SDValue HiChain = SDValue(PartOps[(int)PackElem::Hi].getNode(), ChainResIdx); + SDValue FusedChains = + CDAG.getNode(ISD::TokenFactor, MVT::Other, {LowChain, HiChain}); + + // Chain only [store] + if (PackData) + return FusedChains; + + // Re-pack into full packed vector result + MVT PackedVT = + getLegalVectorType(Packing::Dense, DataVT.getVectorElementType()); + SDValue PackedVals = CDAG.getPack(PackedVT, PartOps[(int)PackElem::Lo], + PartOps[(int)PackElem::Hi], UpperPartAVL); + + return CDAG.getMergeValues({PackedVals, FusedChains}); +} + +SDValue VETargetLowering::legalizeInternalLoadStoreOp(SDValue Op, + VECustomDAG &CDAG) const { + LLVM_DEBUG(dbgs() << "::legalizeInternalLoadStoreOp\n";); + MVT DataVT = getIdiomaticVectorType(Op.getNode())->getSimpleVT(); + + // TODO: Recognize packable load,store. + if (isPackedVectorType(DataVT)) + return splitPackedLoadStore(Op, CDAG); + + return legalizePackedAVL(Op, CDAG); +} + SDValue VETargetLowering::legalizeInternalVectorOp(SDValue Op, SelectionDAG &DAG) const { + LLVM_DEBUG(dbgs() << "::legalizeInternalVectorOp\n";); VECustomDAG CDAG(DAG, Op); + // Dispatch to specialized legalization functions. + switch (Op->getOpcode()) { + case VEISD::VVP_LOAD: + case VEISD::VVP_STORE: + return legalizeInternalLoadStoreOp(Op, CDAG); + } + EVT IdiomVT = Op.getValueType(); if (isPackedVectorType(IdiomVT) && !supportsPackedMode(Op.getOpcode(), IdiomVT)) @@ -229,7 +334,8 @@ // Half and round up EVL for 32bit element types. SDValue LegalAVL = AVL; - if (isPackedVectorType(Op.getValueType())) { + MVT IdiomVT = getIdiomaticVectorType(Op.getNode())->getSimpleVT(); + if (isPackedVectorType(IdiomVT)) { assert(maySafelyIgnoreMask(Op) && "TODO Shift predication from EVL into Mask"); diff --git a/llvm/lib/Transforms/Coroutines/CoroElide.cpp b/llvm/lib/Transforms/Coroutines/CoroElide.cpp --- a/llvm/lib/Transforms/Coroutines/CoroElide.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroElide.cpp @@ -103,21 +103,12 @@ // Given a resume function @f.resume(%f.frame* %frame), returns the size // and expected alignment of %f.frame type. -static std::pair getFrameLayout(Function *Resume) { - // Prefer to pull information from the function attributes. +static Optional> getFrameLayout(Function *Resume) { + // Pull information from the function attributes. auto Size = Resume->getParamDereferenceableBytes(0); - auto Align = Resume->getParamAlign(0); - - // If those aren't given, extract them from the type. - if (Size == 0 || !Align) { - auto *FrameTy = Resume->arg_begin()->getType()->getPointerElementType(); - - const DataLayout &DL = Resume->getParent()->getDataLayout(); - if (!Size) Size = DL.getTypeAllocSize(FrameTy); - if (!Align) Align = DL.getABITypeAlign(FrameTy); - } - - return std::make_pair(Size, *Align); + if (!Size) + return None; + return std::make_pair(Size, Resume->getParamAlign(0).valueOrOne()); } // Finds first non alloca instruction in the entry block of a function. @@ -361,17 +352,19 @@ replaceWithConstant(DestroyAddrConstant, It.second); if (ShouldElide) { - auto FrameSizeAndAlign = getFrameLayout(cast(ResumeAddrConstant)); - elideHeapAllocations(CoroId->getFunction(), FrameSizeAndAlign.first, - FrameSizeAndAlign.second, AA); - coro::replaceCoroFree(CoroId, /*Elide=*/true); - NumOfCoroElided++; + if (auto FrameSizeAndAlign = + getFrameLayout(cast(ResumeAddrConstant))) { + elideHeapAllocations(CoroId->getFunction(), FrameSizeAndAlign->first, + FrameSizeAndAlign->second, AA); + coro::replaceCoroFree(CoroId, /*Elide=*/true); + NumOfCoroElided++; #ifndef NDEBUG - if (!CoroElideInfoOutputFilename.empty()) - *getOrCreateLogFile() - << "Elide " << CoroId->getCoroutine()->getName() << " in " - << CoroId->getFunction()->getName() << "\n"; + if (!CoroElideInfoOutputFilename.empty()) + *getOrCreateLogFile() + << "Elide " << CoroId->getCoroutine()->getName() << " in " + << CoroId->getFunction()->getName() << "\n"; #endif + } } return true; diff --git a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp --- a/llvm/lib/Transforms/Coroutines/CoroFrame.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroFrame.cpp @@ -1079,7 +1079,7 @@ DBuilder.insertDeclare(Shape.FramePtr, FrameDIVar, DBuilder.createExpression(), DILoc, - Shape.FramePtr->getNextNode()); + Shape.getInsertPtAfterFramePtr()); } // Build a struct that will keep state for an active coroutine. @@ -1523,7 +1523,7 @@ LLVMContext &C = CB->getContext(); IRBuilder<> Builder(C); StructType *FrameTy = Shape.FrameTy; - Instruction *FramePtr = Shape.FramePtr; + Value *FramePtr = Shape.FramePtr; DominatorTree DT(*CB->getFunction()); SmallDenseMap DbgPtrAllocaCache; @@ -1576,7 +1576,7 @@ // For arguments, we will place the store instruction right after // the coroutine frame pointer instruction, i.e. bitcast of // coro.begin from i8* to %f.frame*. - InsertPt = FramePtr->getNextNode(); + InsertPt = Shape.getInsertPtAfterFramePtr(); // If we're spilling an Argument, make sure we clear 'nocapture' // from the coroutine function. @@ -1593,7 +1593,7 @@ if (!DT.dominates(CB, I)) { // If it is not dominated by CoroBegin, then spill should be // inserted immediately after CoroFrame is computed. - InsertPt = FramePtr->getNextNode(); + InsertPt = Shape.getInsertPtAfterFramePtr(); } else if (auto *II = dyn_cast(I)) { // If we are spilling the result of the invoke instruction, split // the normal edge and insert the spill in the new block. @@ -1686,10 +1686,10 @@ } } - BasicBlock *FramePtrBB = FramePtr->getParent(); + BasicBlock *FramePtrBB = Shape.getInsertPtAfterFramePtr()->getParent(); - auto SpillBlock = - FramePtrBB->splitBasicBlock(FramePtr->getNextNode(), "AllocaSpillBB"); + auto SpillBlock = FramePtrBB->splitBasicBlock( + Shape.getInsertPtAfterFramePtr(), "AllocaSpillBB"); SpillBlock->splitBasicBlock(&SpillBlock->front(), "PostSpill"); Shape.AllocaSpillBlock = SpillBlock; @@ -1739,7 +1739,7 @@ for (Instruction *I : UsersToUpdate) I->replaceUsesOfWith(Alloca, G); } - Builder.SetInsertPoint(FramePtr->getNextNode()); + Builder.SetInsertPoint(Shape.getInsertPtAfterFramePtr()); for (const auto &A : FrameData.Allocas) { AllocaInst *Alloca = A.Alloca; if (A.MayWriteBeforeCoroBegin) { diff --git a/llvm/lib/Transforms/Coroutines/CoroInternal.h b/llvm/lib/Transforms/Coroutines/CoroInternal.h --- a/llvm/lib/Transforms/Coroutines/CoroInternal.h +++ b/llvm/lib/Transforms/Coroutines/CoroInternal.h @@ -128,7 +128,7 @@ StructType *FrameTy; Align FrameAlign; uint64_t FrameSize; - Instruction *FramePtr; + Value *FramePtr; BasicBlock *AllocaSpillBlock; /// This would only be true if optimization are enabled. @@ -267,6 +267,12 @@ return nullptr; } + Instruction *getInsertPtAfterFramePtr() const { + if (auto *I = dyn_cast(FramePtr)) + return I->getNextNode(); + return &cast(FramePtr)->getParent()->getEntryBlock().front(); + } + /// Allocate memory according to the rules of the active lowering. /// /// \param CG - if non-null, will be updated for the new call diff --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp --- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp +++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp @@ -1152,7 +1152,8 @@ Function *DestroyFn, Function *CleanupFn) { assert(Shape.ABI == coro::ABI::Switch); - IRBuilder<> Builder(Shape.FramePtr->getNextNode()); + IRBuilder<> Builder(Shape.getInsertPtAfterFramePtr()); + auto *ResumeAddr = Builder.CreateStructGEP( Shape.FrameTy, Shape.FramePtr, coro::Shape::SwitchFieldIndex::Resume, "resume.addr"); @@ -1663,7 +1664,7 @@ // Map all uses of llvm.coro.begin to the allocated frame pointer. { // Make sure we don't invalidate Shape.FramePtr. - TrackingVH Handle(Shape.FramePtr); + TrackingVH Handle(Shape.FramePtr); Shape.CoroBegin->replaceAllUsesWith(FramePtr); Shape.FramePtr = Handle.getValPtr(); } @@ -1775,7 +1776,7 @@ // Map all uses of llvm.coro.begin to the allocated frame pointer. { // Make sure we don't invalidate Shape.FramePtr. - TrackingVH Handle(Shape.FramePtr); + TrackingVH Handle(Shape.FramePtr); Shape.CoroBegin->replaceAllUsesWith(RawFramePtr); Shape.FramePtr = Handle.getValPtr(); } diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp --- a/llvm/lib/Transforms/IPO/Attributor.cpp +++ b/llvm/lib/Transforms/IPO/Attributor.cpp @@ -255,17 +255,24 @@ bool AA::isValidAtPosition(const Value &V, const Instruction &CtxI, InformationCache &InfoCache) { - if (isa(V)) + if (isa(V) || &V == &CtxI) return true; const Function *Scope = CtxI.getFunction(); if (auto *A = dyn_cast(&V)) return A->getParent() == Scope; - if (auto *I = dyn_cast(&V)) + if (auto *I = dyn_cast(&V)) { if (I->getFunction() == Scope) { - const DominatorTree *DT = - InfoCache.getAnalysisResultForFunction(*Scope); - return DT && DT->dominates(I, &CtxI); + if (const DominatorTree *DT = + InfoCache.getAnalysisResultForFunction( + *Scope)) + return DT->dominates(I, &CtxI); + // Local dominance check mostly for the old PM passes. + if (I->getParent() == CtxI.getParent()) + return llvm::any_of( + make_range(I->getIterator(), I->getParent()->end()), + [&](const Instruction &AfterI) { return &AfterI == &CtxI; }); } + } return false; } @@ -315,22 +322,32 @@ return nullptr; } -bool AA::getPotentialCopiesOfStoredValue( - Attributor &A, StoreInst &SI, SmallSetVector &PotentialCopies, - const AbstractAttribute &QueryingAA, bool &UsedAssumedInformation) { - - Value &Ptr = *SI.getPointerOperand(); +template +static bool +getPotentialCopiesOfMemoryValue(Attributor &A, Ty &I, + SmallSetVector &PotentialCopies, + const AbstractAttribute &QueryingAA, + bool &UsedAssumedInformation, bool OnlyExact) { + LLVM_DEBUG(dbgs() << "Trying to determine the potential copies of " << I + << " (only exact: " << OnlyExact << ")\n";); + + Value &Ptr = *I.getPointerOperand(); SmallVector Objects; - if (!AA::getAssumedUnderlyingObjects(A, Ptr, Objects, QueryingAA, &SI, + if (!AA::getAssumedUnderlyingObjects(A, Ptr, Objects, QueryingAA, &I, UsedAssumedInformation)) { LLVM_DEBUG( dbgs() << "Underlying objects stored into could not be determined\n";); return false; } + // Containers to remember the pointer infos and new copies while we are not + // sure that we can find all of them. If we abort we want to avoid spurious + // dependences and potential copies in the provided container. SmallVector PIs; SmallVector NewCopies; + const auto *TLI = + A.getInfoCache().getTargetLibraryInfoForFunction(*I.getFunction()); for (Value *Obj : Objects) { LLVM_DEBUG(dbgs() << "Visit underlying object " << *Obj << "\n"); if (isa(Obj)) @@ -338,7 +355,7 @@ if (isa(Obj)) { // A null pointer access can be undefined but any offset from null may // be OK. We do not try to optimize the latter. - if (!NullPointerIsDefined(SI.getFunction(), + if (!NullPointerIsDefined(I.getFunction(), Ptr.getType()->getPointerAddressSpace()) && A.getAssumedSimplified(Ptr, QueryingAA, UsedAssumedInformation) == Obj) @@ -347,8 +364,9 @@ dbgs() << "Underlying object is a valid nullptr, giving up.\n";); return false; } + // TODO: Use assumed noalias return. if (!isa(Obj) && !isa(Obj) && - !isNoAliasCall(Obj)) { + !(IsLoad ? isAllocationFn(Obj, TLI) : isNoAliasCall(Obj))) { LLVM_DEBUG(dbgs() << "Underlying object is not supported yet: " << *Obj << "\n";); return false; @@ -361,23 +379,54 @@ return false; } + if (IsLoad) { + Value *InitialValue = AA::getInitialValueForObj(*Obj, *I.getType(), TLI); + if (!InitialValue) + return false; + NewCopies.push_back(InitialValue); + } + auto CheckAccess = [&](const AAPointerInfo::Access &Acc, bool IsExact) { - if (!Acc.isRead()) + if ((IsLoad && !Acc.isWrite()) || (!IsLoad && !Acc.isRead())) return true; - auto *LI = dyn_cast(Acc.getRemoteInst()); - if (!LI) { - LLVM_DEBUG(dbgs() << "Underlying object read through a non-load " - "instruction not supported yet: " - << *Acc.getRemoteInst() << "\n";); + if (OnlyExact && !IsExact) { + LLVM_DEBUG(dbgs() << "Non exact access " << *Acc.getRemoteInst() + << ", abort!\n"); return false; } - NewCopies.push_back(LI); + if (IsLoad) { + assert(isa(I) && "Expected load or store instruction only!"); + if (Acc.isWrittenValueYetUndetermined()) + return true; + if (!Acc.isWrittenValueUnknown()) { + NewCopies.push_back(Acc.getWrittenValue()); + return true; + } + auto *SI = dyn_cast(Acc.getRemoteInst()); + if (!SI) { + LLVM_DEBUG(dbgs() << "Underlying object written through a non-store " + "instruction not supported yet: " + << *Acc.getRemoteInst() << "\n";); + return false; + } + NewCopies.push_back(SI->getValueOperand()); + } else { + assert(isa(I) && "Expected load or store instruction only!"); + auto *LI = dyn_cast(Acc.getRemoteInst()); + if (!LI && OnlyExact) { + LLVM_DEBUG(dbgs() << "Underlying object read through a non-load " + "instruction not supported yet: " + << *Acc.getRemoteInst() << "\n";); + return false; + } + NewCopies.push_back(Acc.getRemoteInst()); + } return true; }; auto &PI = A.getAAFor(QueryingAA, IRPosition::value(*Obj), DepClassTy::NONE); - if (!PI.forallInterferingAccesses(SI, CheckAccess)) { + if (!PI.forallInterferingAccesses(A, QueryingAA, I, CheckAccess)) { LLVM_DEBUG( dbgs() << "Failed to verify all interfering accesses for underlying object: " @@ -387,6 +436,9 @@ PIs.push_back(&PI); } + // Only if we were successful collection all potential copies we record + // dependences (on non-fix AAPointerInfo AAs). We also only then modify the + // given PotentialCopies container. for (auto *PI : PIs) { if (!PI->getState().isAtFixpoint()) UsedAssumedInformation = true; @@ -397,6 +449,23 @@ return true; } +bool AA::getPotentiallyLoadedValues(Attributor &A, LoadInst &LI, + SmallSetVector &PotentialValues, + const AbstractAttribute &QueryingAA, + bool &UsedAssumedInformation, + bool OnlyExact) { + return getPotentialCopiesOfMemoryValue( + A, LI, PotentialValues, QueryingAA, UsedAssumedInformation, OnlyExact); +} + +bool AA::getPotentialCopiesOfStoredValue( + Attributor &A, StoreInst &SI, SmallSetVector &PotentialCopies, + const AbstractAttribute &QueryingAA, bool &UsedAssumedInformation, + bool OnlyExact) { + return getPotentialCopiesOfMemoryValue( + A, SI, PotentialCopies, QueryingAA, UsedAssumedInformation, OnlyExact); +} + static bool isAssumedReadOnlyOrReadNone(Attributor &A, const IRPosition &IRP, const AbstractAttribute &QueryingAA, bool RequireReadNone, bool &IsKnown) { @@ -1472,30 +1541,24 @@ } bool Attributor::checkForAllInstructions(function_ref Pred, + const Function *Fn, const AbstractAttribute &QueryingAA, const ArrayRef &Opcodes, bool &UsedAssumedInformation, bool CheckBBLivenessOnly, bool CheckPotentiallyDead) { - - const IRPosition &IRP = QueryingAA.getIRPosition(); // Since we need to provide instructions we have to have an exact definition. - const Function *AssociatedFunction = IRP.getAssociatedFunction(); - if (!AssociatedFunction) - return false; - - if (AssociatedFunction->isDeclaration()) + if (!Fn || Fn->isDeclaration()) return false; // TODO: use the function scope once we have call site AAReturnedValues. - const IRPosition &QueryIRP = IRPosition::function(*AssociatedFunction); + const IRPosition &QueryIRP = IRPosition::function(*Fn); const auto *LivenessAA = (CheckBBLivenessOnly || CheckPotentiallyDead) ? nullptr : &(getAAFor(QueryingAA, QueryIRP, DepClassTy::NONE)); - auto &OpcodeInstMap = - InfoCache.getOpcodeInstMapForFunction(*AssociatedFunction); + auto &OpcodeInstMap = InfoCache.getOpcodeInstMapForFunction(*Fn); if (!checkForAllInstructionsImpl(this, OpcodeInstMap, Pred, &QueryingAA, LivenessAA, Opcodes, UsedAssumedInformation, CheckBBLivenessOnly, CheckPotentiallyDead)) @@ -1504,6 +1567,19 @@ return true; } +bool Attributor::checkForAllInstructions(function_ref Pred, + const AbstractAttribute &QueryingAA, + const ArrayRef &Opcodes, + bool &UsedAssumedInformation, + bool CheckBBLivenessOnly, + bool CheckPotentiallyDead) { + const IRPosition &IRP = QueryingAA.getIRPosition(); + const Function *AssociatedFunction = IRP.getAssociatedFunction(); + return checkForAllInstructions(Pred, AssociatedFunction, QueryingAA, Opcodes, + UsedAssumedInformation, CheckBBLivenessOnly, + CheckPotentiallyDead); +} + bool Attributor::checkForAllReadWriteInstructions( function_ref Pred, AbstractAttribute &QueryingAA, bool &UsedAssumedInformation) { diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -73,11 +73,11 @@ cl::location(llvm::PotentialConstantIntValuesState::MaxPotentialValues), cl::init(7)); -static cl::opt - MaxInterferingWrites("attributor-max-interfering-writes", cl::Hidden, - cl::desc("Maximum number of interfering writes to " - "check before assuming all might interfere."), - cl::init(6)); +static cl::opt MaxInterferingAccesses( + "attributor-max-interfering-accesses", cl::Hidden, + cl::desc("Maximum number of interfering accesses to " + "check before assuming all might interfere."), + cl::init(6)); STATISTIC(NumAAs, "Number of abstract attributes created"); @@ -400,6 +400,31 @@ } } + if (auto *LI = dyn_cast(V)) { + bool UsedAssumedInformation = false; + SmallSetVector PotentialCopies; + if (AA::getPotentiallyLoadedValues(A, *LI, PotentialCopies, QueryingAA, + UsedAssumedInformation, + /* OnlyExact */ true)) { + // Values have to be dynamically unique or we loose the fact that a + // single llvm::Value might represent two runtime values (e.g., stack + // locations in different recursive calls). + bool DynamicallyUnique = + llvm::all_of(PotentialCopies, [&A, &QueryingAA](Value *PC) { + return AA::isDynamicallyUnique(A, QueryingAA, *PC); + }); + if (DynamicallyUnique && + (!Intraprocedural || !CtxI || + llvm::all_of(PotentialCopies, [CtxI](Value *PC) { + return AA::isValidInScope(*PC, CtxI->getFunction()); + }))) { + for (auto *PotentialCopy : PotentialCopies) + Worklist.push_back({PotentialCopy, CtxI}); + continue; + } + } + } + // Once a leaf is reached we inform the user through the callback. if (!VisitValueCB(*V, CtxI, State, Iteration > 1)) { LLVM_DEBUG(dbgs() << "Generic value traversal visit callback failed for: " @@ -440,10 +465,11 @@ return true; } -const Value *stripAndAccumulateMinimalOffsets( - Attributor &A, const AbstractAttribute &QueryingAA, const Value *Val, - const DataLayout &DL, APInt &Offset, bool AllowNonInbounds, - bool UseAssumed = false) { +static const Value * +stripAndAccumulateOffsets(Attributor &A, const AbstractAttribute &QueryingAA, + const Value *Val, const DataLayout &DL, APInt &Offset, + bool GetMinOffset, bool AllowNonInbounds, + bool UseAssumed = false) { auto AttributorAnalysis = [&](Value &V, APInt &ROffset) -> bool { const IRPosition &Pos = IRPosition::value(V); @@ -454,14 +480,20 @@ : DepClassTy::NONE); ConstantRange Range = UseAssumed ? ValueConstantRangeAA.getAssumed() : ValueConstantRangeAA.getKnown(); + if (Range.isFullSet()) + return false; + // We can only use the lower part of the range because the upper part can // be higher than what the value can really be. - ROffset = Range.getSignedMin(); + if (GetMinOffset) + ROffset = Range.getSignedMin(); + else + ROffset = Range.getSignedMax(); return true; }; return Val->stripAndAccumulateConstantOffsets(DL, Offset, AllowNonInbounds, - /* AllowInvariant */ false, + /* AllowInvariant */ true, AttributorAnalysis); } @@ -470,8 +502,9 @@ const Value *Ptr, int64_t &BytesOffset, const DataLayout &DL, bool AllowNonInbounds = false) { APInt OffsetAPInt(DL.getIndexTypeSizeInBits(Ptr->getType()), 0); - const Value *Base = stripAndAccumulateMinimalOffsets( - A, QueryingAA, Ptr, DL, OffsetAPInt, AllowNonInbounds); + const Value *Base = + stripAndAccumulateOffsets(A, QueryingAA, Ptr, DL, OffsetAPInt, + /* GetMinOffset */ true, AllowNonInbounds); BytesOffset = OffsetAPInt.getSExtValue(); return Base; @@ -679,7 +712,6 @@ return clampStateAndIndicateChange(S, AA.getState()); } }; -} // namespace /// Helper function to accumulate uses. template @@ -791,6 +823,7 @@ S += ParentState; } } +} // namespace /// ------------------------ PointerInfo --------------------------------------- @@ -1051,6 +1084,7 @@ BooleanState BS; }; +namespace { struct AAPointerInfoImpl : public StateWrapper { using BaseTy = StateWrapper; @@ -1079,22 +1113,12 @@ return State::forallInterferingAccesses(OAS, CB); } bool forallInterferingAccesses( - LoadInst &LI, function_ref CB) - const override { - return State::forallInterferingAccesses(LI, CB); - } - bool forallInterferingAccesses( - StoreInst &SI, function_ref CB) - const override { - return State::forallInterferingAccesses(SI, CB); - } - bool forallInterferingWrites( - Attributor &A, const AbstractAttribute &QueryingAA, LoadInst &LI, + Attributor &A, const AbstractAttribute &QueryingAA, Instruction &I, function_ref UserCB) const override { SmallPtrSet DominatingWrites; - SmallVector, 8> InterferingWrites; + SmallVector, 8> InterferingAccesses; - Function &Scope = *LI.getFunction(); + Function &Scope = *I.getFunction(); const auto &NoSyncAA = A.getAAFor( QueryingAA, IRPosition::function(Scope), DepClassTy::OPTIONAL); const auto *ExecDomainAA = A.lookupAAFor( @@ -1122,13 +1146,15 @@ // TODO: Use inter-procedural reachability and dominance. const auto &NoRecurseAA = A.getAAFor( - QueryingAA, IRPosition::function(*LI.getFunction()), - DepClassTy::OPTIONAL); + QueryingAA, IRPosition::function(Scope), DepClassTy::OPTIONAL); - const bool CanUseCFGResoning = CanIgnoreThreading(LI); + const bool FindInterferingWrites = I.mayReadFromMemory(); + const bool FindInterferingReads = I.mayWriteToMemory(); + const bool UseDominanceReasoning = FindInterferingWrites; + const bool CanUseCFGResoning = CanIgnoreThreading(I); InformationCache &InfoCache = A.getInfoCache(); const DominatorTree *DT = - NoRecurseAA.isKnownNoRecurse() + NoRecurseAA.isKnownNoRecurse() && UseDominanceReasoning ? InfoCache.getAnalysisResultForFunction( Scope) : nullptr; @@ -1184,33 +1210,37 @@ } auto AccessCB = [&](const Access &Acc, bool Exact) { - if (!Acc.isWrite()) + if ((!FindInterferingWrites || !Acc.isWrite()) && + (!FindInterferingReads || !Acc.isRead())) return true; // For now we only filter accesses based on CFG reasoning which does not // work yet if we have threading effects, or the access is complicated. if (CanUseCFGResoning) { - if (!AA::isPotentiallyReachable(A, *Acc.getLocalInst(), LI, QueryingAA, - IsLiveInCalleeCB)) + if ((!Acc.isWrite() || + !AA::isPotentiallyReachable(A, *Acc.getLocalInst(), I, QueryingAA, + IsLiveInCalleeCB)) && + (!Acc.isRead() || + !AA::isPotentiallyReachable(A, I, *Acc.getLocalInst(), QueryingAA, + IsLiveInCalleeCB))) return true; - if (DT && Exact && - (Acc.getLocalInst()->getFunction() == LI.getFunction()) && + if (DT && Exact && (Acc.getLocalInst()->getFunction() == &Scope) && IsSameThreadAsLoad(Acc)) { - if (DT->dominates(Acc.getLocalInst(), &LI)) + if (DT->dominates(Acc.getLocalInst(), &I)) DominatingWrites.insert(&Acc); } } - InterferingWrites.push_back({&Acc, Exact}); + InterferingAccesses.push_back({&Acc, Exact}); return true; }; - if (!State::forallInterferingAccesses(LI, AccessCB)) + if (!State::forallInterferingAccesses(I, AccessCB)) return false; // If we cannot use CFG reasoning we only filter the non-write accesses // and are done here. if (!CanUseCFGResoning) { - for (auto &It : InterferingWrites) + for (auto &It : InterferingAccesses) if (!UserCB(*It.first, It.second)) return false; return true; @@ -1237,11 +1267,11 @@ return false; }; - // Run the user callback on all writes we cannot skip and return if that + // Run the user callback on all accesses we cannot skip and return if that // succeeded for all or not. - unsigned NumInterferingWrites = InterferingWrites.size(); - for (auto &It : InterferingWrites) { - if (!DT || NumInterferingWrites > MaxInterferingWrites || + unsigned NumInterferingAccesses = InterferingAccesses.size(); + for (auto &It : InterferingAccesses) { + if (!DT || NumInterferingAccesses > MaxInterferingAccesses || !CanSkipAccess(*It.first, It.second)) { if (!UserCB(*It.first, It.second)) return false; @@ -1573,7 +1603,7 @@ LengthVal = Length->getSExtValue(); Value &Ptr = getAssociatedValue(); unsigned ArgNo = getIRPosition().getCallSiteArgNo(); - ChangeStatus Changed; + ChangeStatus Changed = ChangeStatus::UNCHANGED; if (ArgNo == 0) { handleAccess(A, *MI, Ptr, nullptr, AccessKind::AK_WRITE, 0, Changed, nullptr, LengthVal); @@ -1616,9 +1646,11 @@ AAPointerInfoImpl::trackPointerInfoStatistics(getIRPosition()); } }; +} // namespace /// -----------------------NoUnwind Function Attribute-------------------------- +namespace { struct AANoUnwindImpl : AANoUnwind { AANoUnwindImpl(const IRPosition &IRP, Attributor &A) : AANoUnwind(IRP, A) {} @@ -1690,9 +1722,11 @@ /// See AbstractAttribute::trackStatistics() void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(nounwind); } }; +} // namespace /// --------------------- Function Return Values ------------------------------- +namespace { /// "Attribute" that collects all potential returned values and the return /// instructions that they arise from. /// @@ -1939,20 +1973,10 @@ /// See AbstractAttribute::trackStatistics() void trackStatistics() const override {} }; +} // namespace /// ------------------------ NoSync Function Attribute ------------------------- -struct AANoSyncImpl : AANoSync { - AANoSyncImpl(const IRPosition &IRP, Attributor &A) : AANoSync(IRP, A) {} - - const std::string getAsStr() const override { - return getAssumed() ? "nosync" : "may-sync"; - } - - /// See AbstractAttribute::updateImpl(...). - ChangeStatus updateImpl(Attributor &A) override; -}; - bool AANoSync::isNonRelaxedAtomic(const Instruction *I) { if (!I->isAtomic()) return false; @@ -1995,6 +2019,18 @@ return false; } +namespace { +struct AANoSyncImpl : AANoSync { + AANoSyncImpl(const IRPosition &IRP, Attributor &A) : AANoSync(IRP, A) {} + + const std::string getAsStr() const override { + return getAssumed() ? "nosync" : "may-sync"; + } + + /// See AbstractAttribute::updateImpl(...). + ChangeStatus updateImpl(Attributor &A) override; +}; + ChangeStatus AANoSyncImpl::updateImpl(Attributor &A) { auto CheckRWInstForNoSync = [&](Instruction &I) { @@ -2057,9 +2093,11 @@ /// See AbstractAttribute::trackStatistics() void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(nosync); } }; +} // namespace /// ------------------------ No-Free Attributes ---------------------------- +namespace { struct AANoFreeImpl : public AANoFree { AANoFreeImpl(const IRPosition &IRP, Attributor &A) : AANoFree(IRP, A) {} @@ -2241,8 +2279,10 @@ /// See AbstractAttribute::trackStatistics() void trackStatistics() const override { STATS_DECLTRACK_CSRET_ATTR(nofree) } }; +} // namespace /// ------------------------ NonNull Argument Attribute ------------------------ +namespace { static int64_t getKnownNonNullAndDerefBytesForUse( Attributor &A, const AbstractAttribute &QueryingAA, Value &AssociatedValue, const Use *U, const Instruction *I, bool &IsNonNull, bool &TrackUse) { @@ -2472,9 +2512,11 @@ /// See AbstractAttribute::trackStatistics() void trackStatistics() const override { STATS_DECLTRACK_CSRET_ATTR(nonnull) } }; +} // namespace /// ------------------------ No-Recurse Attributes ---------------------------- +namespace { struct AANoRecurseImpl : public AANoRecurse { AANoRecurseImpl(const IRPosition &IRP, Attributor &A) : AANoRecurse(IRP, A) {} @@ -2550,9 +2592,11 @@ /// See AbstractAttribute::trackStatistics() void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(norecurse); } }; +} // namespace /// -------------------- Undefined-Behavior Attributes ------------------------ +namespace { struct AAUndefinedBehaviorImpl : public AAUndefinedBehavior { AAUndefinedBehaviorImpl(const IRPosition &IRP, Attributor &A) : AAUndefinedBehavior(IRP, A) {} @@ -2776,7 +2820,7 @@ case Instruction::AtomicRMW: return !AssumedNoUBInsts.count(I); case Instruction::Br: { - auto BrInst = cast(I); + auto *BrInst = cast(I); if (BrInst->isUnconditional()) return false; return !AssumedNoUBInsts.count(I); @@ -2877,9 +2921,11 @@ KnownUBInsts.size(); } }; +} // namespace /// ------------------------ Will-Return Attributes ---------------------------- +namespace { // Helper function that checks whether a function has any cycle which we don't // know if it is bounded or not. // Loops with maximum trip count are considered bounded, any other cycle not. @@ -3018,9 +3064,11 @@ /// See AbstractAttribute::trackStatistics() void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(willreturn); } }; +} // namespace /// -------------------AAReachability Attribute-------------------------- +namespace { struct AAReachabilityImpl : AAReachability { AAReachabilityImpl(const IRPosition &IRP, Attributor &A) : AAReachability(IRP, A) {} @@ -3047,9 +3095,11 @@ /// See AbstractAttribute::trackStatistics() void trackStatistics() const override { STATS_DECLTRACK_FN_ATTR(reachable); } }; +} // namespace /// ------------------------ NoAlias Argument Attribute ------------------------ +namespace { struct AANoAliasImpl : AANoAlias { AANoAliasImpl(const IRPosition &IRP, Attributor &A) : AANoAlias(IRP, A) { assert(getAssociatedType()->isPointerTy() && @@ -3423,9 +3473,11 @@ /// See AbstractAttribute::trackStatistics() void trackStatistics() const override { STATS_DECLTRACK_CSRET_ATTR(noalias); } }; +} // namespace /// -------------------AAIsDead Function Attribute----------------------- +namespace { struct AAIsDeadValueImpl : public AAIsDead { AAIsDeadValueImpl(const IRPosition &IRP, Attributor &A) : AAIsDead(IRP, A) {} @@ -3452,7 +3504,7 @@ } /// See AbstractAttribute::getAsStr(). - const std::string getAsStr() const override { + virtual const std::string getAsStr() const override { return isAssumedDead() ? "assumed-dead" : "assumed-live"; } @@ -3538,6 +3590,15 @@ }); } + /// See AbstractAttribute::getAsStr(). + const std::string getAsStr() const override { + Instruction *I = dyn_cast(&getAssociatedValue()); + if (isa_and_nonnull(I)) + if (isValidState()) + return "assumed-dead-store"; + return AAIsDeadValueImpl::getAsStr(); + } + /// See AbstractAttribute::updateImpl(...). ChangeStatus updateImpl(Attributor &A) override { Instruction *I = dyn_cast(&getAssociatedValue()); @@ -4144,9 +4205,11 @@ /// See AbstractAttribute::trackStatistics() void trackStatistics() const override {} }; +} // namespace /// -------------------- Dereferenceable Argument Attribute -------------------- +namespace { struct AADereferenceableImpl : AADereferenceable { AADereferenceableImpl(const IRPosition &IRP, Attributor &A) : AADereferenceable(IRP, A) {} @@ -4265,8 +4328,9 @@ unsigned IdxWidth = DL.getIndexSizeInBits(V.getType()->getPointerAddressSpace()); APInt Offset(IdxWidth, 0); - const Value *Base = - stripAndAccumulateMinimalOffsets(A, *this, &V, DL, Offset, false); + const Value *Base = stripAndAccumulateOffsets( + A, *this, &V, DL, Offset, /* GetMinOffset */ false, + /* AllowNonInbounds */ true); const auto &AA = A.getAAFor( *this, IRPosition::value(*Base), DepClassTy::REQUIRED); @@ -4381,9 +4445,11 @@ STATS_DECLTRACK_CS_ATTR(dereferenceable); } }; +} // namespace // ------------------------ Align Argument Attribute ------------------------ +namespace { static unsigned getKnownAlignForUse(Attributor &A, AAAlign &QueryingAA, Value &AssociatedValue, const Use *U, const Instruction *I, bool &TrackUse) { @@ -4455,13 +4521,7 @@ takeKnownMaximum(Attr.getValueAsInt()); Value &V = getAssociatedValue(); - // TODO: This is a HACK to avoid getPointerAlignment to introduce a ptr2int - // use of the function pointer. This was caused by D73131. We want to - // avoid this for function pointers especially because we iterate - // their uses and int2ptr is not handled. It is not a correctness - // problem though! - if (!V.getType()->getPointerElementType()->isFunctionTy()) - takeKnownMaximum(V.getPointerAlignment(A.getDataLayout()).value()); + takeKnownMaximum(V.getPointerAlignment(A.getDataLayout()).value()); if (getIRPosition().isFnInterfaceKind() && (!getAnchorScope() || @@ -4552,6 +4612,8 @@ auto VisitValueCB = [&](Value &V, const Instruction *, AAAlign::StateType &T, bool Stripped) -> bool { + if (isa(V) || isa(V)) + return true; const auto &AA = A.getAAFor(*this, IRPosition::value(V), DepClassTy::REQUIRED); if (!Stripped && this == &AA) { @@ -4559,6 +4621,7 @@ unsigned Alignment = 1; if (const Value *Base = GetPointerBaseWithConstantOffset(&V, Offset, DL)) { + // TODO: Use AAAlign for the base too. Align PA = Base->getPointerAlignment(DL); // BasePointerAddr + Offset = Alignment * Q for some integer Q. // So we can say that the maximum power of two which is a divisor of @@ -4690,8 +4753,10 @@ /// See AbstractAttribute::trackStatistics() void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(align); } }; +} // namespace /// ------------------ Function No-Return Attribute ---------------------------- +namespace { struct AANoReturnImpl : public AANoReturn { AANoReturnImpl(const IRPosition &IRP, Attributor &A) : AANoReturn(IRP, A) {} @@ -4759,9 +4824,11 @@ /// See AbstractAttribute::trackStatistics() void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(noreturn); } }; +} // namespace /// ----------------------- Variable Capturing --------------------------------- +namespace { /// A class to hold the state of for no-capture attributes. struct AANoCaptureImpl : public AANoCapture { AANoCaptureImpl(const IRPosition &IRP, Attributor &A) : AANoCapture(IRP, A) {} @@ -5214,6 +5281,7 @@ STATS_DECLTRACK_CSRET_ATTR(nocapture) } }; +} // namespace /// ------------------ Value Simplify Attribute ---------------------------- @@ -5234,6 +5302,7 @@ return true; } +namespace { struct AAValueSimplifyImpl : AAValueSimplify { AAValueSimplifyImpl(const IRPosition &IRP, Attributor &A) : AAValueSimplify(IRP, A) {} @@ -5413,7 +5482,7 @@ auto &PI = A.getAAFor(AA, IRPosition::value(*Obj), DepClassTy::REQUIRED); - if (!PI.forallInterferingWrites(A, AA, L, CheckAccess)) + if (!PI.forallInterferingAccesses(A, AA, L, CheckAccess)) return false; } return true; @@ -5432,15 +5501,6 @@ Attribute::StructRet, Attribute::Nest, Attribute::ByVal}, /* IgnoreSubsumingPositions */ true)) indicatePessimisticFixpoint(); - - // FIXME: This is a hack to prevent us from propagating function poiner in - // the new pass manager CGSCC pass as it creates call edges the - // CallGraphUpdater cannot handle yet. - Value &V = getAssociatedValue(); - if (V.getType()->isPointerTy() && - V.getType()->getPointerElementType()->isFunctionTy() && - !A.isModulePass()) - indicatePessimisticFixpoint(); } /// See AbstractAttribute::updateImpl(...). @@ -5539,6 +5599,11 @@ ChangeStatus manifest(Attributor &A) override { ChangeStatus Changed = ChangeStatus::UNCHANGED; + if (!A.isRunOn(*getAnchorScope())) + return Changed; + + assert(!hasCallBaseContext() && "Should never manifest a simplified " + "function return with call base context!"); if (auto *NewV = getReplacementValue(A)) { auto PredForReturned = @@ -5869,8 +5934,10 @@ STATS_DECLTRACK_CSARG_ATTR(value_simplify) } }; +} // namespace /// ----------------------- Heap-To-Stack Conversion --------------------------- +namespace { struct AAHeapToStackFunction final : public AAHeapToStack { struct AllocationInfo { @@ -5954,6 +6021,16 @@ /* CheckPotentiallyDead */ true); (void)Success; assert(Success && "Did not expect the call base visit callback to fail!"); + + Attributor::SimplifictionCallbackTy SCB = + [](const IRPosition &, const AbstractAttribute *, + bool &) -> Optional { return nullptr; }; + for (const auto &It : AllocationInfos) + A.registerSimplificationCallback(IRPosition::callsite_returned(*It.first), + SCB); + for (const auto &It : DeallocationInfos) + A.registerSimplificationCallback(IRPosition::callsite_returned(*It.first), + SCB); } const std::string getAsStr() const override { @@ -6413,8 +6490,10 @@ return Changed; } +} // namespace /// ----------------------- Privatizable Pointers ------------------------------ +namespace { struct AAPrivatizablePtrImpl : public AAPrivatizablePtr { AAPrivatizablePtrImpl(const IRPosition &IRP, Attributor &A) : AAPrivatizablePtr(IRP, A), PrivatizableType(llvm::None) {} @@ -7013,10 +7092,12 @@ STATS_DECLTRACK_FNRET_ATTR(privatizable_ptr); } }; +} // namespace /// -------------------- Memory Behavior Attributes ---------------------------- /// Includes read-none, read-only, and write-only. /// ---------------------------------------------------------------------------- +namespace { struct AAMemoryBehaviorImpl : public AAMemoryBehavior { AAMemoryBehaviorImpl(const IRPosition &IRP, Attributor &A) : AAMemoryBehavior(IRP, A) {} @@ -7516,6 +7597,7 @@ if (UserI->mayWriteToMemory()) removeAssumedBits(NO_WRITES); } +} // namespace /// -------------------- Memory Locations Attributes --------------------------- /// Includes read-none, argmemonly, inaccessiblememonly, @@ -7549,6 +7631,7 @@ return S; } +namespace { struct AAMemoryLocationImpl : public AAMemoryLocation { AAMemoryLocationImpl(const IRPosition &IRP, Attributor &A) @@ -8065,9 +8148,11 @@ STATS_DECLTRACK_CS_ATTR(readnone) } }; +} // namespace /// ------------------ Value Constant Range Attribute ------------------------- +namespace { struct AAValueConstantRangeImpl : AAValueConstantRange { using StateType = IntegerRangeState; AAValueConstantRangeImpl(const IRPosition &IRP, Attributor &A) @@ -8708,9 +8793,11 @@ STATS_DECLTRACK_CSARG_ATTR(value_range) } }; +} // namespace /// ------------------ Potential Values Attribute ------------------------- +namespace { struct AAPotentialValuesImpl : AAPotentialValues { using StateType = PotentialConstantIntValuesState; @@ -9895,8 +9982,10 @@ /// This is for instruction queries than scan "forward". DenseMap InstQueries; }; +} // namespace /// ---------------------- Assumption Propagation ------------------------------ +namespace { struct AAAssumptionInfoImpl : public AAAssumptionInfo { AAAssumptionInfoImpl(const IRPosition &IRP, Attributor &A, const DenseSet &Known) @@ -10030,6 +10119,7 @@ return Assumptions; } }; +} // namespace AACallGraphNode *AACallEdgeIterator::operator*() const { return static_cast(const_cast( diff --git a/llvm/lib/Transforms/IPO/HotColdSplitting.cpp b/llvm/lib/Transforms/IPO/HotColdSplitting.cpp --- a/llvm/lib/Transforms/IPO/HotColdSplitting.cpp +++ b/llvm/lib/Transforms/IPO/HotColdSplitting.cpp @@ -352,7 +352,7 @@ // TODO: Pass BFI and BPI to update profile information. CodeExtractor CE(Region, &DT, /* AggregateArgs */ false, /* BFI */ nullptr, /* BPI */ nullptr, AC, /* AllowVarArgs */ false, - /* AllowAlloca */ false, + /* AllowAlloca */ false, /* AllocaBlock */ nullptr, /* Suffix */ "cold." + std::to_string(Count)); // Perform a simple cost/benefit analysis to decide whether or not to permit diff --git a/llvm/lib/Transforms/IPO/IROutliner.cpp b/llvm/lib/Transforms/IPO/IROutliner.cpp --- a/llvm/lib/Transforms/IPO/IROutliner.cpp +++ b/llvm/lib/Transforms/IPO/IROutliner.cpp @@ -2679,7 +2679,7 @@ OS->Candidate->getBasicBlocks(BlocksInRegion, BE); OS->CE = new (ExtractorAllocator.Allocate()) CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false, - false, "outlined"); + false, nullptr, "outlined"); findAddInputsOutputs(M, *OS, NotSame); if (!OS->IgnoreRegion) OutlinedRegions.push_back(OS); @@ -2790,7 +2790,7 @@ OS->Candidate->getBasicBlocks(BlocksInRegion, BE); OS->CE = new (ExtractorAllocator.Allocate()) CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false, - false, "outlined"); + false, nullptr, "outlined"); bool FunctionOutlined = extractSection(*OS); if (FunctionOutlined) { unsigned StartIdx = OS->Candidate->getStartIdx(); diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -2966,9 +2966,15 @@ auto &OMPInfoCache = static_cast(A.getInfoCache()); auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared]; + Attributor::SimplifictionCallbackTy SCB = + [](const IRPosition &, const AbstractAttribute *, + bool &) -> Optional { return nullptr; }; for (User *U : RFI.Declaration->users()) - if (CallBase *CB = dyn_cast(U)) + if (CallBase *CB = dyn_cast(U)) { MallocCalls.insert(CB); + A.registerSimplificationCallback(IRPosition::callsite_returned(*CB), + SCB); + } findPotentialRemovedFreeCalls(A); } diff --git a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp --- a/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp +++ b/llvm/lib/Transforms/Scalar/ConstraintElimination.cpp @@ -74,6 +74,9 @@ } void popLastConstraint(bool Signed) { getCS(Signed).popLastConstraint(); } + void popLastNVariables(bool Signed, unsigned N) { + getCS(Signed).popLastNVariables(N); + } }; /// Struct to express a pre-condition of the form %Op0 Pred %Op1. @@ -150,9 +153,9 @@ } if (auto *CI = dyn_cast(V)) { - if (CI->isNegative() || CI->uge(MaxConstraintValue)) + if (CI->uge(MaxConstraintValue)) return {}; - return {{CI->getSExtValue(), nullptr}}; + return {{CI->getZExtValue(), nullptr}}; } auto *GEP = dyn_cast(V); if (GEP && GEP->getNumOperands() == 2 && GEP->isInBounds()) { @@ -205,8 +208,9 @@ Value *Op1; ConstantInt *CI; - if (match(V, m_NUWAdd(m_Value(Op0), m_ConstantInt(CI)))) - return {{CI->getSExtValue(), nullptr}, {1, Op0}}; + if (match(V, m_NUWAdd(m_Value(Op0), m_ConstantInt(CI))) && + !CI->uge(MaxConstraintValue)) + return {{CI->getZExtValue(), nullptr}, {1, Op0}}; if (match(V, m_Add(m_Value(Op0), m_ConstantInt(CI))) && CI->isNegative()) { Preconditions.emplace_back( CmpInst::ICMP_UGE, Op0, @@ -371,11 +375,14 @@ Instruction *Condition; bool IsNot; bool IsSigned = false; + /// Variables that can be removed from the system once the stack entry gets + /// removed. + SmallVector ValuesToRelease; - StackEntry(unsigned NumIn, unsigned NumOut, Instruction *Condition, - bool IsNot, bool IsSigned) + StackEntry(unsigned NumIn, unsigned NumOut, CmpInst *Condition, bool IsNot, + bool IsSigned, SmallVector ValuesToRelease) : NumIn(NumIn), NumOut(NumOut), Condition(Condition), IsNot(IsNot), - IsSigned(IsSigned) {} + IsSigned(IsSigned), ValuesToRelease(ValuesToRelease) {} }; } // namespace @@ -407,6 +414,19 @@ continue; WorkList.emplace_back(DT.getNode(&BB)); + // Returns true if we can add a known condition from BB to its successor + // block Succ. Each predecessor of Succ can either be BB or be dominated by + // Succ (e.g. the case when adding a condition from a pre-header to a loop + // header). + auto CanAdd = [&BB, &DT](BasicBlock *Succ) { + assert(isa(BB.getTerminator())); + return any_of(successors(&BB), + [Succ](const BasicBlock *S) { return S != Succ; }) && + all_of(predecessors(Succ), [&BB, &DT, Succ](BasicBlock *Pred) { + return Pred == &BB || DT.dominates(Succ, Pred); + }); + }; + // True as long as long as the current instruction is guaranteed to execute. bool GuaranteedToExecute = true; // Scan BB for assume calls. @@ -425,9 +445,12 @@ WorkList.emplace_back(DT.getNode(&BB), cast(Cond), false); } else { // Otherwise the condition only holds in the successors. - for (BasicBlock *Succ : successors(&BB)) + for (BasicBlock *Succ : successors(&BB)) { + if (!CanAdd(Succ)) + continue; WorkList.emplace_back(DT.getNode(Succ), cast(Cond), false); + } } } GuaranteedToExecute &= isGuaranteedToTransferExecutionToSuccessor(&I); @@ -437,18 +460,6 @@ if (!Br || !Br->isConditional()) continue; - // Returns true if we can add a known condition from BB to its successor - // block Succ. Each predecessor of Succ can either be BB or be dominated by - // Succ (e.g. the case when adding a condition from a pre-header to a loop - // header). - auto CanAdd = [&BB, &DT](BasicBlock *Succ) { - assert(isa(BB.getTerminator())); - return any_of(successors(&BB), - [Succ](const BasicBlock *S) { return S != Succ; }) && - all_of(predecessors(Succ), [&BB, &DT, Succ](BasicBlock *Pred) { - return Pred == &BB || DT.dominates(Succ, Pred); - }); - }; // If the condition is an OR of 2 compares and the false successor only has // the current block as predecessor, queue both negated conditions for the // false successor. @@ -512,8 +523,13 @@ break; LLVM_DEBUG(dbgs() << "Removing " << *E.Condition << " " << E.IsNot << "\n"); - DFSInStack.pop_back(); Info.popLastConstraint(E.IsSigned); + // Remove variables in the system that went out of scope. + auto &Mapping = Info.getValue2Index(E.IsSigned); + for (Value *V : E.ValuesToRelease) + Mapping.erase(V); + Info.popLastNVariables(E.IsSigned, E.ValuesToRelease.size()); + DFSInStack.pop_back(); } LLVM_DEBUG({ @@ -603,10 +619,6 @@ if (!R.isValid(Info)) continue; - for (auto &KV : NewIndices) - Info.getValue2Index(CmpInst::isSigned(CB.Condition->getPredicate())) - .insert(KV); - LLVM_DEBUG(dbgs() << "Adding " << *CB.Condition << " " << CB.Not << "\n"); bool Added = false; assert(CmpInst::isSigned(CB.Condition->getPredicate()) == R.IsSigned && @@ -620,8 +632,11 @@ // If R has been added to the system, queue it for removal once it goes // out-of-scope. if (Added) { - for (auto &KV : NewIndices) + SmallVector ValuesToRelease; + for (auto &KV : NewIndices) { Info.getValue2Index(R.IsSigned).insert(KV); + ValuesToRelease.push_back(KV.first); + } LLVM_DEBUG({ dbgs() << " constraint: "; @@ -629,7 +644,7 @@ }); DFSInStack.emplace_back(CB.NumIn, CB.NumOut, CB.Condition, CB.Not, - R.IsSigned); + R.IsSigned, ValuesToRelease); if (R.IsEq) { // Also add the inverted constraint for equality constraints. @@ -638,7 +653,7 @@ CSToUse.addVariableRowFill(R.Coefficients); DFSInStack.emplace_back(CB.NumIn, CB.NumOut, CB.Condition, CB.Not, - R.IsSigned); + R.IsSigned, SmallVector()); } } } diff --git a/llvm/lib/Transforms/Scalar/LoopPassManager.cpp b/llvm/lib/Transforms/Scalar/LoopPassManager.cpp --- a/llvm/lib/Transforms/Scalar/LoopPassManager.cpp +++ b/llvm/lib/Transforms/Scalar/LoopPassManager.cpp @@ -309,12 +309,12 @@ #ifndef NDEBUG // LoopAnalysisResults should always be valid. - // Note that we don't LAR.SE.verify() because that can change observed SE - // queries. See PR44815. if (VerifyDomInfo) LAR.DT.verify(); if (VerifyLoopInfo) LAR.LI.verify(LAR.DT); + if (VerifySCEV) + LAR.SE.verify(); if (LAR.MSSA && VerifyMemorySSA) LAR.MSSA->verifyMemorySSA(); #endif diff --git a/llvm/lib/Transforms/Scalar/LoopSink.cpp b/llvm/lib/Transforms/Scalar/LoopSink.cpp --- a/llvm/lib/Transforms/Scalar/LoopSink.cpp +++ b/llvm/lib/Transforms/Scalar/LoopSink.cpp @@ -319,6 +319,8 @@ // on B (A appears after B), A needs to be sinked first before B can be // sinked. for (Instruction &I : llvm::make_early_inc_range(llvm::reverse(*Preheader))) { + if (isa(&I)) + continue; // No need to check for instruction's operands are loop invariant. assert(L.hasLoopInvariantOperands(&I) && "Insts in a loop's preheader should have loop invariant operands!"); diff --git a/llvm/lib/Transforms/Utils/CodeExtractor.cpp b/llvm/lib/Transforms/Utils/CodeExtractor.cpp --- a/llvm/lib/Transforms/Utils/CodeExtractor.cpp +++ b/llvm/lib/Transforms/Utils/CodeExtractor.cpp @@ -246,9 +246,10 @@ bool AggregateArgs, BlockFrequencyInfo *BFI, BranchProbabilityInfo *BPI, AssumptionCache *AC, bool AllowVarArgs, bool AllowAlloca, - std::string Suffix) + BasicBlock *AllocationBlock, std::string Suffix) : DT(DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI), - BPI(BPI), AC(AC), AllowVarArgs(AllowVarArgs), + BPI(BPI), AC(AC), AllocationBlock(AllocationBlock), + AllowVarArgs(AllowVarArgs), Blocks(buildExtractionBlockSet(BBs, DT, AllowVarArgs, AllowAlloca)), Suffix(Suffix) {} @@ -257,7 +258,7 @@ BranchProbabilityInfo *BPI, AssumptionCache *AC, std::string Suffix) : DT(&DT), AggregateArgs(AggregateArgs || AggregateArgsOpt), BFI(BFI), - BPI(BPI), AC(AC), AllowVarArgs(false), + BPI(BPI), AC(AC), AllocationBlock(nullptr), AllowVarArgs(false), Blocks(buildExtractionBlockSet(L.getBlocks(), &DT, /* AllowVarArgs */ false, /* AllowAlloca */ false)), @@ -1189,9 +1190,10 @@ // Allocate a struct at the beginning of this function StructArgTy = StructType::get(newFunction->getContext(), ArgTypes); - Struct = new AllocaInst(StructArgTy, DL.getAllocaAddrSpace(), nullptr, - "structArg", - &codeReplacer->getParent()->front().front()); + Struct = new AllocaInst( + StructArgTy, DL.getAllocaAddrSpace(), nullptr, "structArg", + AllocationBlock ? &*AllocationBlock->getFirstInsertionPt() + : &codeReplacer->getParent()->front().front()); params.push_back(Struct); // Store aggregated inputs in the struct. diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -10356,8 +10356,8 @@ const std::string DebugLocStr = getDebugLocString(L); #endif /* NDEBUG */ - LLVM_DEBUG(dbgs() << "\nLV: Checking a loop in \"" - << L->getHeader()->getParent()->getName() << "\" from " + LLVM_DEBUG(dbgs() << "\nLV: Checking a loop in '" + << L->getHeader()->getParent()->getName() << "' from " << DebugLocStr << "\n"); LoopVectorizeHints Hints(L, InterleaveOnlyWhenForced, *ORE, TTI); diff --git a/llvm/test/Analysis/CostModel/AArch64/arith.ll b/llvm/test/Analysis/CostModel/AArch64/arith.ll --- a/llvm/test/Analysis/CostModel/AArch64/arith.ll +++ b/llvm/test/Analysis/CostModel/AArch64/arith.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt -cost-model -analyze -mtriple=aarch64-linux-gnu < %s | FileCheck %s +; RUN: opt -passes='print' -cost-kind=throughput 2>&1 -disable-output -mtriple=aarch64-linux-gnu < %s | FileCheck %s define void @i1() { ; CHECK-LABEL: 'i1' diff --git a/llvm/test/Analysis/CostModel/AArch64/cast.ll b/llvm/test/Analysis/CostModel/AArch64/cast.ll --- a/llvm/test/Analysis/CostModel/AArch64/cast.ll +++ b/llvm/test/Analysis/CostModel/AArch64/cast.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt -cost-model -analyze -mtriple=aarch64-none-linux-gnueabi %s | FileCheck --check-prefixes=CHECK,CHECK-NOFP16 %s -; RUN: opt -cost-model -analyze -mtriple=aarch64-none-linux-gnueabi -mattr=+fullfp16 %s | FileCheck --check-prefixes=CHECK,CHECK-FP16 %s +; RUN: opt -passes='print' 2>&1 -disable-output -mtriple=aarch64-none-linux-gnueabi %s | FileCheck --check-prefixes=CHECK,CHECK-NOFP16 %s +; RUN: opt -passes='print' 2>&1 -disable-output -mtriple=aarch64-none-linux-gnueabi -mattr=+fullfp16 %s | FileCheck --check-prefixes=CHECK,CHECK-FP16 %s define void @ext() { ; CHECK-LABEL: 'ext' diff --git a/llvm/test/Analysis/CostModel/AArch64/min-max.ll b/llvm/test/Analysis/CostModel/AArch64/min-max.ll --- a/llvm/test/Analysis/CostModel/AArch64/min-max.ll +++ b/llvm/test/Analysis/CostModel/AArch64/min-max.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt < %s -mtriple=aarch64-unknown-linux-gnu -cost-model -analyze -cost-kind=throughput | FileCheck %s --check-prefixes=CHECK,CHECK-NOF16 -; RUN: opt < %s -mtriple=aarch64-unknown-linux-gnu -mattr=+fullfp16 -cost-model -analyze -cost-kind=throughput | FileCheck %s --check-prefixes=CHECK,CHECK-F16 +; RUN: opt < %s -mtriple=aarch64-unknown-linux-gnu -passes='print' 2>&1 -disable-output -cost-kind=throughput | FileCheck %s --check-prefixes=CHECK,CHECK-NOF16 +; RUN: opt < %s -mtriple=aarch64-unknown-linux-gnu -mattr=+fullfp16 -passes='print' 2>&1 -disable-output -cost-kind=throughput | FileCheck %s --check-prefixes=CHECK,CHECK-F16 define void @umin() { ; CHECK-LABEL: 'umin' diff --git a/llvm/test/Analysis/CostModel/X86/gather-i16-with-i8-index.ll b/llvm/test/Analysis/CostModel/X86/gather-i16-with-i8-index.ll --- a/llvm/test/Analysis/CostModel/X86/gather-i16-with-i8-index.ll +++ b/llvm/test/Analysis/CostModel/X86/gather-i16-with-i8-index.ll @@ -14,7 +14,7 @@ @B = global [1024 x i16] zeroinitializer, align 128 @C = global [1024 x i16] zeroinitializer, align 128 -; CHECK: LV: Checking a loop in "test" +; CHECK: LV: Checking a loop in 'test' ; ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i16, i16* %inB, align 2 ; SSE2: LV: Found an estimated cost of 24 for VF 2 For instruction: %valB = load i16, i16* %inB, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/gather-i32-with-i8-index.ll b/llvm/test/Analysis/CostModel/X86/gather-i32-with-i8-index.ll --- a/llvm/test/Analysis/CostModel/X86/gather-i32-with-i8-index.ll +++ b/llvm/test/Analysis/CostModel/X86/gather-i32-with-i8-index.ll @@ -14,7 +14,7 @@ @B = global [1024 x i32] zeroinitializer, align 128 @C = global [1024 x i32] zeroinitializer, align 128 -; CHECK: LV: Checking a loop in "test" +; CHECK: LV: Checking a loop in 'test' ; ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i32, i32* %inB, align 4 ; SSE2: LV: Found an estimated cost of 25 for VF 2 For instruction: %valB = load i32, i32* %inB, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/gather-i64-with-i8-index.ll b/llvm/test/Analysis/CostModel/X86/gather-i64-with-i8-index.ll --- a/llvm/test/Analysis/CostModel/X86/gather-i64-with-i8-index.ll +++ b/llvm/test/Analysis/CostModel/X86/gather-i64-with-i8-index.ll @@ -14,7 +14,7 @@ @B = global [1024 x i64] zeroinitializer, align 128 @C = global [1024 x i64] zeroinitializer, align 128 -; CHECK: LV: Checking a loop in "test" +; CHECK: LV: Checking a loop in 'test' ; ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i64, i64* %inB, align 8 ; SSE2: LV: Found an estimated cost of 25 for VF 2 For instruction: %valB = load i64, i64* %inB, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/gather-i8-with-i8-index.ll b/llvm/test/Analysis/CostModel/X86/gather-i8-with-i8-index.ll --- a/llvm/test/Analysis/CostModel/X86/gather-i8-with-i8-index.ll +++ b/llvm/test/Analysis/CostModel/X86/gather-i8-with-i8-index.ll @@ -14,7 +14,7 @@ @B = global [1024 x i8] zeroinitializer, align 128 @C = global [1024 x i8] zeroinitializer, align 128 -; CHECK: LV: Checking a loop in "test" +; CHECK: LV: Checking a loop in 'test' ; ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB = load i8, i8* %inB, align 1 ; SSE2: LV: Found an estimated cost of 25 for VF 2 For instruction: %valB = load i8, i8* %inB, align 1 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-2.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-2.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-2.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-2.ll @@ -10,7 +10,7 @@ @A = global [1024 x float] zeroinitializer, align 128 @B = global [1024 x i8] zeroinitializer, align 128 -; CHECK: LV: Checking a loop in "test" +; CHECK: LV: Checking a loop in 'test' ; ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, float* %in0, align 4 ; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load float, float* %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-3.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-3.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-3.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-3.ll @@ -10,7 +10,7 @@ @A = global [1024 x float] zeroinitializer, align 128 @B = global [1024 x i8] zeroinitializer, align 128 -; CHECK: LV: Checking a loop in "test" +; CHECK: LV: Checking a loop in 'test' ; ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, float* %in0, align 4 ; SSE2: LV: Found an estimated cost of 9 for VF 2 For instruction: %v0 = load float, float* %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-4.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-4.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-4.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-4.ll @@ -10,7 +10,7 @@ @A = global [1024 x float] zeroinitializer, align 128 @B = global [1024 x i8] zeroinitializer, align 128 -; CHECK: LV: Checking a loop in "test" +; CHECK: LV: Checking a loop in 'test' ; ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, float* %in0, align 4 ; SSE2: LV: Found an estimated cost of 12 for VF 2 For instruction: %v0 = load float, float* %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-6.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-6.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-6.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-6.ll @@ -10,7 +10,7 @@ @A = global [1024 x float] zeroinitializer, align 128 @B = global [1024 x i8] zeroinitializer, align 128 -; CHECK: LV: Checking a loop in "test" +; CHECK: LV: Checking a loop in 'test' ; ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, float* %in0, align 4 ; SSE2: LV: Found an estimated cost of 18 for VF 2 For instruction: %v0 = load float, float* %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-2.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-2.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-2.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-2.ll @@ -10,7 +10,7 @@ @A = global [1024 x double] zeroinitializer, align 128 @B = global [1024 x i8] zeroinitializer, align 128 -; CHECK: LV: Checking a loop in "test" +; CHECK: LV: Checking a loop in 'test' ; ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, double* %in0, align 8 ; SSE2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load double, double* %in0, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-3.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-3.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-3.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-3.ll @@ -10,7 +10,7 @@ @A = global [1024 x double] zeroinitializer, align 128 @B = global [1024 x i8] zeroinitializer, align 128 -; CHECK: LV: Checking a loop in "test" +; CHECK: LV: Checking a loop in 'test' ; ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, double* %in0, align 8 ; SSE2: LV: Found an estimated cost of 9 for VF 2 For instruction: %v0 = load double, double* %in0, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-4.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-4.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-4.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-4.ll @@ -10,7 +10,7 @@ @A = global [1024 x double] zeroinitializer, align 128 @B = global [1024 x i8] zeroinitializer, align 128 -; CHECK: LV: Checking a loop in "test" +; CHECK: LV: Checking a loop in 'test' ; ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, double* %in0, align 8 ; SSE2: LV: Found an estimated cost of 12 for VF 2 For instruction: %v0 = load double, double* %in0, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-6.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-6.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-6.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-6.ll @@ -10,7 +10,7 @@ @A = global [1024 x double] zeroinitializer, align 128 @B = global [1024 x i8] zeroinitializer, align 128 -; CHECK: LV: Checking a loop in "test" +; CHECK: LV: Checking a loop in 'test' ; ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, double* %in0, align 8 ; SSE2: LV: Found an estimated cost of 18 for VF 2 For instruction: %v0 = load double, double* %in0, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-2.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-2.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-2.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-2.ll @@ -10,7 +10,7 @@ @A = global [1024 x i16] zeroinitializer, align 128 @B = global [1024 x i8] zeroinitializer, align 128 -; CHECK: LV: Checking a loop in "test" +; CHECK: LV: Checking a loop in 'test' ; ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, i16* %in0, align 2 ; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load i16, i16* %in0, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-3.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-3.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-3.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-3.ll @@ -10,7 +10,7 @@ @A = global [1024 x i16] zeroinitializer, align 128 @B = global [1024 x i8] zeroinitializer, align 128 -; CHECK: LV: Checking a loop in "test" +; CHECK: LV: Checking a loop in 'test' ; ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, i16* %in0, align 2 ; SSE2: LV: Found an estimated cost of 18 for VF 2 For instruction: %v0 = load i16, i16* %in0, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-4.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-4.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-4.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-4.ll @@ -10,7 +10,7 @@ @A = global [1024 x i16] zeroinitializer, align 128 @B = global [1024 x i8] zeroinitializer, align 128 -; CHECK: LV: Checking a loop in "test" +; CHECK: LV: Checking a loop in 'test' ; ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, i16* %in0, align 2 ; SSE2: LV: Found an estimated cost of 17 for VF 2 For instruction: %v0 = load i16, i16* %in0, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-5.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-5.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-5.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-5.ll @@ -10,7 +10,7 @@ @A = global [1024 x i16] zeroinitializer, align 128 @B = global [1024 x i8] zeroinitializer, align 128 -; CHECK: LV: Checking a loop in "test" +; CHECK: LV: Checking a loop in 'test' ; ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, i16* %in0, align 2 ; SSE2: LV: Found an estimated cost of 22 for VF 2 For instruction: %v0 = load i16, i16* %in0, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-6.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-6.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-6.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-6.ll @@ -10,7 +10,7 @@ @A = global [1024 x i16] zeroinitializer, align 128 @B = global [1024 x i8] zeroinitializer, align 128 -; CHECK: LV: Checking a loop in "test" +; CHECK: LV: Checking a loop in 'test' ; ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, i16* %in0, align 2 ; SSE2: LV: Found an estimated cost of 26 for VF 2 For instruction: %v0 = load i16, i16* %in0, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-2-indices-0u.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-2-indices-0u.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-2-indices-0u.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-2-indices-0u.ll @@ -10,7 +10,7 @@ @A = global [1024 x i32] zeroinitializer, align 128 @B = global [1024 x i8] zeroinitializer, align 128 -; CHECK: LV: Checking a loop in "test" +; CHECK: LV: Checking a loop in 'test' ; ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4 ; SSE2: LV: Found an estimated cost of 2 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-2.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-2.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-2.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-2.ll @@ -10,7 +10,7 @@ @A = global [1024 x i32] zeroinitializer, align 128 @B = global [1024 x i8] zeroinitializer, align 128 -; CHECK: LV: Checking a loop in "test" +; CHECK: LV: Checking a loop in 'test' ; ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4 ; SSE2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3-indices-01u.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3-indices-01u.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3-indices-01u.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3-indices-01u.ll @@ -10,7 +10,7 @@ @A = global [1024 x i32] zeroinitializer, align 128 @B = global [1024 x i8] zeroinitializer, align 128 -; CHECK: LV: Checking a loop in "test" +; CHECK: LV: Checking a loop in 'test' ; ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4 ; SSE2: LV: Found an estimated cost of 14 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3-indices-0uu.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3-indices-0uu.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3-indices-0uu.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3-indices-0uu.ll @@ -10,7 +10,7 @@ @A = global [1024 x i32] zeroinitializer, align 128 @B = global [1024 x i8] zeroinitializer, align 128 -; CHECK: LV: Checking a loop in "test" +; CHECK: LV: Checking a loop in 'test' ; ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4 ; SSE2: LV: Found an estimated cost of 8 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-3.ll @@ -10,7 +10,7 @@ @A = global [1024 x i32] zeroinitializer, align 128 @B = global [1024 x i8] zeroinitializer, align 128 -; CHECK: LV: Checking a loop in "test" +; CHECK: LV: Checking a loop in 'test' ; ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4 ; SSE2: LV: Found an estimated cost of 21 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-012u.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-012u.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-012u.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-012u.ll @@ -10,7 +10,7 @@ @A = global [1024 x i32] zeroinitializer, align 128 @B = global [1024 x i8] zeroinitializer, align 128 -; CHECK: LV: Checking a loop in "test" +; CHECK: LV: Checking a loop in 'test' ; ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4 ; SSE2: LV: Found an estimated cost of 21 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-01uu.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-01uu.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-01uu.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-01uu.ll @@ -10,7 +10,7 @@ @A = global [1024 x i32] zeroinitializer, align 128 @B = global [1024 x i8] zeroinitializer, align 128 -; CHECK: LV: Checking a loop in "test" +; CHECK: LV: Checking a loop in 'test' ; ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4 ; SSE2: LV: Found an estimated cost of 14 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-0uuu.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-0uuu.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-0uuu.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-0uuu.ll @@ -10,7 +10,7 @@ @A = global [1024 x i32] zeroinitializer, align 128 @B = global [1024 x i8] zeroinitializer, align 128 -; CHECK: LV: Checking a loop in "test" +; CHECK: LV: Checking a loop in 'test' ; ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4 ; SSE2: LV: Found an estimated cost of 7 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4.ll @@ -10,7 +10,7 @@ @A = global [1024 x i32] zeroinitializer, align 128 @B = global [1024 x i8] zeroinitializer, align 128 -; CHECK: LV: Checking a loop in "test" +; CHECK: LV: Checking a loop in 'test' ; ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4 ; SSE2: LV: Found an estimated cost of 28 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-6.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-6.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-6.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-6.ll @@ -10,7 +10,7 @@ @A = global [1024 x i32] zeroinitializer, align 128 @B = global [1024 x i8] zeroinitializer, align 128 -; CHECK: LV: Checking a loop in "test" +; CHECK: LV: Checking a loop in 'test' ; ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, i32* %in0, align 4 ; SSE2: LV: Found an estimated cost of 42 for VF 2 For instruction: %v0 = load i32, i32* %in0, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-2.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-2.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-2.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-2.ll @@ -10,7 +10,7 @@ @A = global [1024 x i64] zeroinitializer, align 128 @B = global [1024 x i8] zeroinitializer, align 128 -; CHECK: LV: Checking a loop in "test" +; CHECK: LV: Checking a loop in 'test' ; ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, i64* %in0, align 8 ; SSE2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i64, i64* %in0, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-3.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-3.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-3.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-3.ll @@ -10,7 +10,7 @@ @A = global [1024 x i64] zeroinitializer, align 128 @B = global [1024 x i8] zeroinitializer, align 128 -; CHECK: LV: Checking a loop in "test" +; CHECK: LV: Checking a loop in 'test' ; ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, i64* %in0, align 8 ; SSE2: LV: Found an estimated cost of 21 for VF 2 For instruction: %v0 = load i64, i64* %in0, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-4.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-4.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-4.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-4.ll @@ -10,7 +10,7 @@ @A = global [1024 x i64] zeroinitializer, align 128 @B = global [1024 x i8] zeroinitializer, align 128 -; CHECK: LV: Checking a loop in "test" +; CHECK: LV: Checking a loop in 'test' ; ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, i64* %in0, align 8 ; SSE2: LV: Found an estimated cost of 28 for VF 2 For instruction: %v0 = load i64, i64* %in0, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-6.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-6.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-6.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-6.ll @@ -10,7 +10,7 @@ @A = global [1024 x i64] zeroinitializer, align 128 @B = global [1024 x i8] zeroinitializer, align 128 -; CHECK: LV: Checking a loop in "test" +; CHECK: LV: Checking a loop in 'test' ; ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, i64* %in0, align 8 ; SSE2: LV: Found an estimated cost of 42 for VF 2 For instruction: %v0 = load i64, i64* %in0, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-2.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-2.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-2.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-2.ll @@ -10,7 +10,7 @@ @A = global [1024 x i8] zeroinitializer, align 128 @B = global [1024 x i8] zeroinitializer, align 128 -; CHECK: LV: Checking a loop in "test" +; CHECK: LV: Checking a loop in 'test' ; ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, i8* %in0, align 1 ; SSE2: LV: Found an estimated cost of 14 for VF 2 For instruction: %v0 = load i8, i8* %in0, align 1 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-3.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-3.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-3.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-3.ll @@ -10,7 +10,7 @@ @A = global [1024 x i8] zeroinitializer, align 128 @B = global [1024 x i8] zeroinitializer, align 128 -; CHECK: LV: Checking a loop in "test" +; CHECK: LV: Checking a loop in 'test' ; ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, i8* %in0, align 1 ; SSE2: LV: Found an estimated cost of 23 for VF 2 For instruction: %v0 = load i8, i8* %in0, align 1 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-4.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-4.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-4.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-4.ll @@ -10,7 +10,7 @@ @A = global [1024 x i8] zeroinitializer, align 128 @B = global [1024 x i8] zeroinitializer, align 128 -; CHECK: LV: Checking a loop in "test" +; CHECK: LV: Checking a loop in 'test' ; ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, i8* %in0, align 1 ; SSE2: LV: Found an estimated cost of 28 for VF 2 For instruction: %v0 = load i8, i8* %in0, align 1 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-6.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-6.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-6.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-6.ll @@ -10,7 +10,7 @@ @A = global [1024 x i8] zeroinitializer, align 128 @B = global [1024 x i8] zeroinitializer, align 128 -; CHECK: LV: Checking a loop in "test" +; CHECK: LV: Checking a loop in 'test' ; ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, i8* %in0, align 1 ; SSE2: LV: Found an estimated cost of 47 for VF 2 For instruction: %v0 = load i8, i8* %in0, align 1 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-2.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-2.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-2.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-2.ll @@ -10,7 +10,7 @@ @A = global [1024 x i8] zeroinitializer, align 128 @B = global [1024 x float] zeroinitializer, align 128 -; CHECK: LV: Checking a loop in "test" +; CHECK: LV: Checking a loop in 'test' ; ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v1, float* %out1, align 4 ; SSE2: LV: Found an estimated cost of 2 for VF 2 For instruction: store float %v1, float* %out1, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-3.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-3.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-3.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-3.ll @@ -10,7 +10,7 @@ @A = global [1024 x i8] zeroinitializer, align 128 @B = global [1024 x float] zeroinitializer, align 128 -; CHECK: LV: Checking a loop in "test" +; CHECK: LV: Checking a loop in 'test' ; ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v2, float* %out2, align 4 ; SSE2: LV: Found an estimated cost of 11 for VF 2 For instruction: store float %v2, float* %out2, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-4.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-4.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-4.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-4.ll @@ -10,7 +10,7 @@ @A = global [1024 x i8] zeroinitializer, align 128 @B = global [1024 x float] zeroinitializer, align 128 -; CHECK: LV: Checking a loop in "test" +; CHECK: LV: Checking a loop in 'test' ; ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v3, float* %out3, align 4 ; SSE2: LV: Found an estimated cost of 12 for VF 2 For instruction: store float %v3, float* %out3, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-6.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-6.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-6.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-f32-stride-6.ll @@ -10,7 +10,7 @@ @A = global [1024 x i8] zeroinitializer, align 128 @B = global [1024 x float] zeroinitializer, align 128 -; CHECK: LV: Checking a loop in "test" +; CHECK: LV: Checking a loop in 'test' ; ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store float %v5, float* %out5, align 4 ; SSE2: LV: Found an estimated cost of 21 for VF 2 For instruction: store float %v5, float* %out5, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-2.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-2.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-2.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-2.ll @@ -10,7 +10,7 @@ @A = global [1024 x i8] zeroinitializer, align 128 @B = global [1024 x double] zeroinitializer, align 128 -; CHECK: LV: Checking a loop in "test" +; CHECK: LV: Checking a loop in 'test' ; ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v1, double* %out1, align 8 ; SSE2: LV: Found an estimated cost of 6 for VF 2 For instruction: store double %v1, double* %out1, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-3.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-3.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-3.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-3.ll @@ -10,7 +10,7 @@ @A = global [1024 x i8] zeroinitializer, align 128 @B = global [1024 x double] zeroinitializer, align 128 -; CHECK: LV: Checking a loop in "test" +; CHECK: LV: Checking a loop in 'test' ; ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v2, double* %out2, align 8 ; SSE2: LV: Found an estimated cost of 10 for VF 2 For instruction: store double %v2, double* %out2, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-4.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-4.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-4.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-4.ll @@ -10,7 +10,7 @@ @A = global [1024 x i8] zeroinitializer, align 128 @B = global [1024 x double] zeroinitializer, align 128 -; CHECK: LV: Checking a loop in "test" +; CHECK: LV: Checking a loop in 'test' ; ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v3, double* %out3, align 8 ; SSE2: LV: Found an estimated cost of 12 for VF 2 For instruction: store double %v3, double* %out3, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-6.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-6.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-6.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-6.ll @@ -10,7 +10,7 @@ @A = global [1024 x i8] zeroinitializer, align 128 @B = global [1024 x double] zeroinitializer, align 128 -; CHECK: LV: Checking a loop in "test" +; CHECK: LV: Checking a loop in 'test' ; ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store double %v5, double* %out5, align 8 ; SSE2: LV: Found an estimated cost of 20 for VF 2 For instruction: store double %v5, double* %out5, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-2.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-2.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-2.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-2.ll @@ -10,7 +10,7 @@ @A = global [1024 x i8] zeroinitializer, align 128 @B = global [1024 x i16] zeroinitializer, align 128 -; CHECK: LV: Checking a loop in "test" +; CHECK: LV: Checking a loop in 'test' ; ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v1, i16* %out1, align 2 ; SSE2: LV: Found an estimated cost of 2 for VF 2 For instruction: store i16 %v1, i16* %out1, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-3.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-3.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-3.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-3.ll @@ -10,7 +10,7 @@ @A = global [1024 x i8] zeroinitializer, align 128 @B = global [1024 x i16] zeroinitializer, align 128 -; CHECK: LV: Checking a loop in "test" +; CHECK: LV: Checking a loop in 'test' ; ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v2, i16* %out2, align 2 ; SSE2: LV: Found an estimated cost of 16 for VF 2 For instruction: store i16 %v2, i16* %out2, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-4.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-4.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-4.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-4.ll @@ -10,7 +10,7 @@ @A = global [1024 x i8] zeroinitializer, align 128 @B = global [1024 x i16] zeroinitializer, align 128 -; CHECK: LV: Checking a loop in "test" +; CHECK: LV: Checking a loop in 'test' ; ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v3, i16* %out3, align 2 ; SSE2: LV: Found an estimated cost of 17 for VF 2 For instruction: store i16 %v3, i16* %out3, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-5.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-5.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-5.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-5.ll @@ -10,7 +10,7 @@ @A = global [1024 x i8] zeroinitializer, align 128 @B = global [1024 x i16] zeroinitializer, align 128 -; CHECK: LV: Checking a loop in "test" +; CHECK: LV: Checking a loop in 'test' ; ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v4, i16* %out4, align 2 ; SSE2: LV: Found an estimated cost of 22 for VF 2 For instruction: store i16 %v4, i16* %out4, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-6.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-6.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-6.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i16-stride-6.ll @@ -10,7 +10,7 @@ @A = global [1024 x i8] zeroinitializer, align 128 @B = global [1024 x i16] zeroinitializer, align 128 -; CHECK: LV: Checking a loop in "test" +; CHECK: LV: Checking a loop in 'test' ; ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %v5, i16* %out5, align 2 ; SSE2: LV: Found an estimated cost of 26 for VF 2 For instruction: store i16 %v5, i16* %out5, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-2.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-2.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-2.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-2.ll @@ -10,7 +10,7 @@ @A = global [1024 x i8] zeroinitializer, align 128 @B = global [1024 x i32] zeroinitializer, align 128 -; CHECK: LV: Checking a loop in "test" +; CHECK: LV: Checking a loop in 'test' ; ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v1, i32* %out1, align 4 ; SSE2: LV: Found an estimated cost of 2 for VF 2 For instruction: store i32 %v1, i32* %out1, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-3.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-3.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-3.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-3.ll @@ -10,7 +10,7 @@ @A = global [1024 x i8] zeroinitializer, align 128 @B = global [1024 x i32] zeroinitializer, align 128 -; CHECK: LV: Checking a loop in "test" +; CHECK: LV: Checking a loop in 'test' ; ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v2, i32* %out2, align 4 ; SSE2: LV: Found an estimated cost of 23 for VF 2 For instruction: store i32 %v2, i32* %out2, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-4.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-4.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-4.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-4.ll @@ -10,7 +10,7 @@ @A = global [1024 x i8] zeroinitializer, align 128 @B = global [1024 x i32] zeroinitializer, align 128 -; CHECK: LV: Checking a loop in "test" +; CHECK: LV: Checking a loop in 'test' ; ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v3, i32* %out3, align 4 ; SSE2: LV: Found an estimated cost of 28 for VF 2 For instruction: store i32 %v3, i32* %out3, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-6.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-6.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-6.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i32-stride-6.ll @@ -10,7 +10,7 @@ @A = global [1024 x i8] zeroinitializer, align 128 @B = global [1024 x i32] zeroinitializer, align 128 -; CHECK: LV: Checking a loop in "test" +; CHECK: LV: Checking a loop in 'test' ; ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %v5, i32* %out5, align 4 ; SSE2: LV: Found an estimated cost of 45 for VF 2 For instruction: store i32 %v5, i32* %out5, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-2.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-2.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-2.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-2.ll @@ -10,7 +10,7 @@ @A = global [1024 x i8] zeroinitializer, align 128 @B = global [1024 x i64] zeroinitializer, align 128 -; CHECK: LV: Checking a loop in "test" +; CHECK: LV: Checking a loop in 'test' ; ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v1, i64* %out1, align 8 ; SSE2: LV: Found an estimated cost of 14 for VF 2 For instruction: store i64 %v1, i64* %out1, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-3.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-3.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-3.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-3.ll @@ -10,7 +10,7 @@ @A = global [1024 x i8] zeroinitializer, align 128 @B = global [1024 x i64] zeroinitializer, align 128 -; CHECK: LV: Checking a loop in "test" +; CHECK: LV: Checking a loop in 'test' ; ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v2, i64* %out2, align 8 ; SSE2: LV: Found an estimated cost of 22 for VF 2 For instruction: store i64 %v2, i64* %out2, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-4.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-4.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-4.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-4.ll @@ -10,7 +10,7 @@ @A = global [1024 x i8] zeroinitializer, align 128 @B = global [1024 x i64] zeroinitializer, align 128 -; CHECK: LV: Checking a loop in "test" +; CHECK: LV: Checking a loop in 'test' ; ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v3, i64* %out3, align 8 ; SSE2: LV: Found an estimated cost of 28 for VF 2 For instruction: store i64 %v3, i64* %out3, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-6.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-6.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-6.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-6.ll @@ -10,7 +10,7 @@ @A = global [1024 x i8] zeroinitializer, align 128 @B = global [1024 x i64] zeroinitializer, align 128 -; CHECK: LV: Checking a loop in "test" +; CHECK: LV: Checking a loop in 'test' ; ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %v5, i64* %out5, align 8 ; SSE2: LV: Found an estimated cost of 44 for VF 2 For instruction: store i64 %v5, i64* %out5, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-2.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-2.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-2.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-2.ll @@ -10,7 +10,7 @@ @A = global [1024 x i8] zeroinitializer, align 128 @B = global [1024 x i8] zeroinitializer, align 128 -; CHECK: LV: Checking a loop in "test" +; CHECK: LV: Checking a loop in 'test' ; ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v1, i8* %out1, align 1 ; SSE2: LV: Found an estimated cost of 2 for VF 2 For instruction: store i8 %v1, i8* %out1, align 1 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-3.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-3.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-3.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-3.ll @@ -10,7 +10,7 @@ @A = global [1024 x i8] zeroinitializer, align 128 @B = global [1024 x i8] zeroinitializer, align 128 -; CHECK: LV: Checking a loop in "test" +; CHECK: LV: Checking a loop in 'test' ; ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v2, i8* %out2, align 1 ; SSE2: LV: Found an estimated cost of 25 for VF 2 For instruction: store i8 %v2, i8* %out2, align 1 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-4.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-4.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-4.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-4.ll @@ -10,7 +10,7 @@ @A = global [1024 x i8] zeroinitializer, align 128 @B = global [1024 x i8] zeroinitializer, align 128 -; CHECK: LV: Checking a loop in "test" +; CHECK: LV: Checking a loop in 'test' ; ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v3, i8* %out3, align 1 ; SSE2: LV: Found an estimated cost of 28 for VF 2 For instruction: store i8 %v3, i8* %out3, align 1 diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-6.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-6.ll --- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-6.ll +++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i8-stride-6.ll @@ -10,7 +10,7 @@ @A = global [1024 x i8] zeroinitializer, align 128 @B = global [1024 x i8] zeroinitializer, align 128 -; CHECK: LV: Checking a loop in "test" +; CHECK: LV: Checking a loop in 'test' ; ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %v5, i8* %out5, align 1 ; SSE2: LV: Found an estimated cost of 49 for VF 2 For instruction: store i8 %v5, i8* %out5, align 1 diff --git a/llvm/test/Analysis/CostModel/X86/masked-gather-i32-with-i8-index.ll b/llvm/test/Analysis/CostModel/X86/masked-gather-i32-with-i8-index.ll --- a/llvm/test/Analysis/CostModel/X86/masked-gather-i32-with-i8-index.ll +++ b/llvm/test/Analysis/CostModel/X86/masked-gather-i32-with-i8-index.ll @@ -14,7 +14,7 @@ @B = global [1024 x i32] zeroinitializer, align 128 @C = global [1024 x i32] zeroinitializer, align 128 -; CHECK: LV: Checking a loop in "test" +; CHECK: LV: Checking a loop in 'test' ; ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB.loaded = load i32, i32* %inB, align 4 ; SSE2: LV: Found an estimated cost of 3000000 for VF 2 For instruction: %valB.loaded = load i32, i32* %inB, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/masked-gather-i64-with-i8-index.ll b/llvm/test/Analysis/CostModel/X86/masked-gather-i64-with-i8-index.ll --- a/llvm/test/Analysis/CostModel/X86/masked-gather-i64-with-i8-index.ll +++ b/llvm/test/Analysis/CostModel/X86/masked-gather-i64-with-i8-index.ll @@ -14,7 +14,7 @@ @B = global [1024 x i64] zeroinitializer, align 128 @C = global [1024 x i64] zeroinitializer, align 128 -; CHECK: LV: Checking a loop in "test" +; CHECK: LV: Checking a loop in 'test' ; ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB.loaded = load i64, i64* %inB, align 8 ; SSE2: LV: Found an estimated cost of 3000000 for VF 2 For instruction: %valB.loaded = load i64, i64* %inB, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/masked-interleaved-load-i16.ll b/llvm/test/Analysis/CostModel/X86/masked-interleaved-load-i16.ll --- a/llvm/test/Analysis/CostModel/X86/masked-interleaved-load-i16.ll +++ b/llvm/test/Analysis/CostModel/X86/masked-interleaved-load-i16.ll @@ -17,7 +17,7 @@ ; } ; (relates to the testcase in PR50566) -; DISABLED_MASKED_STRIDED: LV: Checking a loop in "test1" +; DISABLED_MASKED_STRIDED: LV: Checking a loop in 'test1' ; ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: %i2 = load i16, i16* %arrayidx2, align 2 ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: %i4 = load i16, i16* %arrayidx7, align 2 @@ -34,7 +34,7 @@ ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 62 for VF 16 For instruction: %i2 = load i16, i16* %arrayidx2, align 2 ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 62 for VF 16 For instruction: %i4 = load i16, i16* %arrayidx7, align 2 -; ENABLED_MASKED_STRIDED: LV: Checking a loop in "test1" +; ENABLED_MASKED_STRIDED: LV: Checking a loop in 'test1' ; ; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: %i2 = load i16, i16* %arrayidx2, align 2 ; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: %i4 = load i16, i16* %arrayidx7, align 2 @@ -84,7 +84,7 @@ ; y[i] = points[i*4 + 1]; ; } -; DISABLED_MASKED_STRIDED: LV: Checking a loop in "test2" +; DISABLED_MASKED_STRIDED: LV: Checking a loop in 'test2' ; ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: %i2 = load i16, i16* %arrayidx2, align 2 ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: %i4 = load i16, i16* %arrayidx7, align 2 @@ -101,7 +101,7 @@ ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 16 For instruction: %i2 = load i16, i16* %arrayidx2, align 2 ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 16 For instruction: %i4 = load i16, i16* %arrayidx7, align 2 -; ENABLED_MASKED_STRIDED: LV: Checking a loop in "test2" +; ENABLED_MASKED_STRIDED: LV: Checking a loop in 'test2' ; ; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: %i2 = load i16, i16* %arrayidx2, align 2 ; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: %i4 = load i16, i16* %arrayidx7, align 2 @@ -161,7 +161,7 @@ ; x[i] = points[i*3]; ; } -; DISABLED_MASKED_STRIDED: LV: Checking a loop in "test" +; DISABLED_MASKED_STRIDED: LV: Checking a loop in 'test' ; ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: %i4 = load i16, i16* %arrayidx6, align 2 ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 2 For instruction: %i4 = load i16, i16* %arrayidx6, align 2 @@ -169,7 +169,7 @@ ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 8 For instruction: %i4 = load i16, i16* %arrayidx6, align 2 ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 16 For instruction: %i4 = load i16, i16* %arrayidx6, align 2 -; ENABLED_MASKED_STRIDED: LV: Checking a loop in "test" +; ENABLED_MASKED_STRIDED: LV: Checking a loop in 'test' ; ; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: %i4 = load i16, i16* %arrayidx6, align 2 ; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 7 for VF 2 For instruction: %i4 = load i16, i16* %arrayidx6, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/masked-interleaved-store-i16.ll b/llvm/test/Analysis/CostModel/X86/masked-interleaved-store-i16.ll --- a/llvm/test/Analysis/CostModel/X86/masked-interleaved-store-i16.ll +++ b/llvm/test/Analysis/CostModel/X86/masked-interleaved-store-i16.ll @@ -17,7 +17,7 @@ ; } ; (relates to the testcase in PR50566) -; DISABLED_MASKED_STRIDED: LV: Checking a loop in "test1" +; DISABLED_MASKED_STRIDED: LV: Checking a loop in 'test1' ; ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, i16* %arrayidx2, align 2 ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %2, i16* %arrayidx7, align 2 @@ -34,7 +34,7 @@ ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 68 for VF 16 For instruction: store i16 %0, i16* %arrayidx2, align 2 ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 68 for VF 16 For instruction: store i16 %2, i16* %arrayidx7, align 2 -; ENABLED_MASKED_STRIDED: LV: Checking a loop in "test1" +; ENABLED_MASKED_STRIDED: LV: Checking a loop in 'test1' ; ; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, i16* %arrayidx2, align 2 ; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %2, i16* %arrayidx7, align 2 @@ -84,7 +84,7 @@ ; points[i*4 + 1] = y[i]; ; } -; DISABLED_MASKED_STRIDED: LV: Checking a loop in "test2" +; DISABLED_MASKED_STRIDED: LV: Checking a loop in 'test2' ; ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, i16* %arrayidx2, align 2 ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %2, i16* %arrayidx7, align 2 @@ -101,7 +101,7 @@ ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 50 for VF 16 For instruction: store i16 %0, i16* %arrayidx2, align 2 ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 3000000 for VF 16 For instruction: store i16 %2, i16* %arrayidx7, align 2 -; ENABLED_MASKED_STRIDED: LV: Checking a loop in "test2" +; ENABLED_MASKED_STRIDED: LV: Checking a loop in 'test2' ; ; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, i16* %arrayidx2, align 2 ; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %2, i16* %arrayidx7, align 2 @@ -161,7 +161,7 @@ ; points[i*3] = x[i]; ; } -; DISABLED_MASKED_STRIDED: LV: Checking a loop in "test" +; DISABLED_MASKED_STRIDED: LV: Checking a loop in 'test' ; ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, i16* %arrayidx6, align 2 ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 2 for VF 2 For instruction: store i16 %0, i16* %arrayidx6, align 2 @@ -169,7 +169,7 @@ ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 8 for VF 8 For instruction: store i16 %0, i16* %arrayidx6, align 2 ; DISABLED_MASKED_STRIDED: LV: Found an estimated cost of 20 for VF 16 For instruction: store i16 %0, i16* %arrayidx6, align 2 -; ENABLED_MASKED_STRIDED: LV: Checking a loop in "test" +; ENABLED_MASKED_STRIDED: LV: Checking a loop in 'test' ; ; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, i16* %arrayidx6, align 2 ; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 2 for VF 2 For instruction: store i16 %0, i16* %arrayidx6, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/masked-load-i16.ll b/llvm/test/Analysis/CostModel/X86/masked-load-i16.ll --- a/llvm/test/Analysis/CostModel/X86/masked-load-i16.ll +++ b/llvm/test/Analysis/CostModel/X86/masked-load-i16.ll @@ -13,7 +13,7 @@ @A = global [1024 x i8] zeroinitializer, align 128 @C = global [1024 x i16] zeroinitializer, align 128 -; CHECK: LV: Checking a loop in "test" +; CHECK: LV: Checking a loop in 'test' ; ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB.loaded = load i16, i16* %inB, align 2 ; SSE2: LV: Found an estimated cost of 3000000 for VF 2 For instruction: %valB.loaded = load i16, i16* %inB, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/masked-load-i32.ll b/llvm/test/Analysis/CostModel/X86/masked-load-i32.ll --- a/llvm/test/Analysis/CostModel/X86/masked-load-i32.ll +++ b/llvm/test/Analysis/CostModel/X86/masked-load-i32.ll @@ -13,7 +13,7 @@ @A = global [1024 x i8] zeroinitializer, align 128 @C = global [1024 x i32] zeroinitializer, align 128 -; CHECK: LV: Checking a loop in "test" +; CHECK: LV: Checking a loop in 'test' ; ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB.loaded = load i32, i32* %inB, align 4 ; SSE2: LV: Found an estimated cost of 3000000 for VF 2 For instruction: %valB.loaded = load i32, i32* %inB, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/masked-load-i64.ll b/llvm/test/Analysis/CostModel/X86/masked-load-i64.ll --- a/llvm/test/Analysis/CostModel/X86/masked-load-i64.ll +++ b/llvm/test/Analysis/CostModel/X86/masked-load-i64.ll @@ -13,7 +13,7 @@ @A = global [1024 x i8] zeroinitializer, align 128 @C = global [1024 x i64] zeroinitializer, align 128 -; CHECK: LV: Checking a loop in "test" +; CHECK: LV: Checking a loop in 'test' ; ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB.loaded = load i64, i64* %inB, align 8 ; SSE2: LV: Found an estimated cost of 3000000 for VF 2 For instruction: %valB.loaded = load i64, i64* %inB, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/masked-load-i8.ll b/llvm/test/Analysis/CostModel/X86/masked-load-i8.ll --- a/llvm/test/Analysis/CostModel/X86/masked-load-i8.ll +++ b/llvm/test/Analysis/CostModel/X86/masked-load-i8.ll @@ -13,7 +13,7 @@ @A = global [1024 x i8] zeroinitializer, align 128 @C = global [1024 x i8] zeroinitializer, align 128 -; CHECK: LV: Checking a loop in "test" +; CHECK: LV: Checking a loop in 'test' ; ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: %valB.loaded = load i8, i8* %inB, align 1 ; SSE2: LV: Found an estimated cost of 3000000 for VF 2 For instruction: %valB.loaded = load i8, i8* %inB, align 1 diff --git a/llvm/test/Analysis/CostModel/X86/masked-scatter-i32-with-i8-index.ll b/llvm/test/Analysis/CostModel/X86/masked-scatter-i32-with-i8-index.ll --- a/llvm/test/Analysis/CostModel/X86/masked-scatter-i32-with-i8-index.ll +++ b/llvm/test/Analysis/CostModel/X86/masked-scatter-i32-with-i8-index.ll @@ -14,7 +14,7 @@ @B = global [1024 x i32] zeroinitializer, align 128 @C = global [1024 x i32] zeroinitializer, align 128 -; CHECK: LV: Checking a loop in "test" +; CHECK: LV: Checking a loop in 'test' ; ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %valB, i32* %out, align 4 ; SSE2: LV: Found an estimated cost of 2 for VF 2 For instruction: store i32 %valB, i32* %out, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/masked-scatter-i64-with-i8-index.ll b/llvm/test/Analysis/CostModel/X86/masked-scatter-i64-with-i8-index.ll --- a/llvm/test/Analysis/CostModel/X86/masked-scatter-i64-with-i8-index.ll +++ b/llvm/test/Analysis/CostModel/X86/masked-scatter-i64-with-i8-index.ll @@ -14,7 +14,7 @@ @B = global [1024 x i64] zeroinitializer, align 128 @C = global [1024 x i64] zeroinitializer, align 128 -; CHECK: LV: Checking a loop in "test" +; CHECK: LV: Checking a loop in 'test' ; ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %valB, i64* %out, align 8 ; SSE2: LV: Found an estimated cost of 2 for VF 2 For instruction: store i64 %valB, i64* %out, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/masked-store-i16.ll b/llvm/test/Analysis/CostModel/X86/masked-store-i16.ll --- a/llvm/test/Analysis/CostModel/X86/masked-store-i16.ll +++ b/llvm/test/Analysis/CostModel/X86/masked-store-i16.ll @@ -13,7 +13,7 @@ @A = global [1024 x i8] zeroinitializer, align 128 @B = global [1024 x i16] zeroinitializer, align 128 -; CHECK: LV: Checking a loop in "test" +; CHECK: LV: Checking a loop in 'test' ; ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %valB, i16* %out, align 2 ; SSE2: LV: Found an estimated cost of 2 for VF 2 For instruction: store i16 %valB, i16* %out, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/masked-store-i32.ll b/llvm/test/Analysis/CostModel/X86/masked-store-i32.ll --- a/llvm/test/Analysis/CostModel/X86/masked-store-i32.ll +++ b/llvm/test/Analysis/CostModel/X86/masked-store-i32.ll @@ -13,7 +13,7 @@ @A = global [1024 x i8] zeroinitializer, align 128 @B = global [1024 x i32] zeroinitializer, align 128 -; CHECK: LV: Checking a loop in "test" +; CHECK: LV: Checking a loop in 'test' ; ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %valB, i32* %out, align 4 ; SSE2: LV: Found an estimated cost of 2 for VF 2 For instruction: store i32 %valB, i32* %out, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/masked-store-i64.ll b/llvm/test/Analysis/CostModel/X86/masked-store-i64.ll --- a/llvm/test/Analysis/CostModel/X86/masked-store-i64.ll +++ b/llvm/test/Analysis/CostModel/X86/masked-store-i64.ll @@ -13,7 +13,7 @@ @A = global [1024 x i8] zeroinitializer, align 128 @B = global [1024 x i64] zeroinitializer, align 128 -; CHECK: LV: Checking a loop in "test" +; CHECK: LV: Checking a loop in 'test' ; ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %valB, i64* %out, align 8 ; SSE2: LV: Found an estimated cost of 2 for VF 2 For instruction: store i64 %valB, i64* %out, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/masked-store-i8.ll b/llvm/test/Analysis/CostModel/X86/masked-store-i8.ll --- a/llvm/test/Analysis/CostModel/X86/masked-store-i8.ll +++ b/llvm/test/Analysis/CostModel/X86/masked-store-i8.ll @@ -13,7 +13,7 @@ @A = global [1024 x i8] zeroinitializer, align 128 @B = global [1024 x i8] zeroinitializer, align 128 -; CHECK: LV: Checking a loop in "test" +; CHECK: LV: Checking a loop in 'test' ; ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %valB, i8* %out, align 1 ; SSE2: LV: Found an estimated cost of 2 for VF 2 For instruction: store i8 %valB, i8* %out, align 1 diff --git a/llvm/test/Analysis/CostModel/X86/scatter-i16-with-i8-index.ll b/llvm/test/Analysis/CostModel/X86/scatter-i16-with-i8-index.ll --- a/llvm/test/Analysis/CostModel/X86/scatter-i16-with-i8-index.ll +++ b/llvm/test/Analysis/CostModel/X86/scatter-i16-with-i8-index.ll @@ -14,7 +14,7 @@ @B = global [1024 x i16] zeroinitializer, align 128 @C = global [1024 x i16] zeroinitializer, align 128 -; CHECK: LV: Checking a loop in "test" +; CHECK: LV: Checking a loop in 'test' ; ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %valB, i16* %out, align 2 ; SSE2: LV: Found an estimated cost of 28 for VF 2 For instruction: store i16 %valB, i16* %out, align 2 diff --git a/llvm/test/Analysis/CostModel/X86/scatter-i32-with-i8-index.ll b/llvm/test/Analysis/CostModel/X86/scatter-i32-with-i8-index.ll --- a/llvm/test/Analysis/CostModel/X86/scatter-i32-with-i8-index.ll +++ b/llvm/test/Analysis/CostModel/X86/scatter-i32-with-i8-index.ll @@ -14,7 +14,7 @@ @B = global [1024 x i32] zeroinitializer, align 128 @C = global [1024 x i32] zeroinitializer, align 128 -; CHECK: LV: Checking a loop in "test" +; CHECK: LV: Checking a loop in 'test' ; ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %valB, i32* %out, align 4 ; SSE2: LV: Found an estimated cost of 29 for VF 2 For instruction: store i32 %valB, i32* %out, align 4 diff --git a/llvm/test/Analysis/CostModel/X86/scatter-i64-with-i8-index.ll b/llvm/test/Analysis/CostModel/X86/scatter-i64-with-i8-index.ll --- a/llvm/test/Analysis/CostModel/X86/scatter-i64-with-i8-index.ll +++ b/llvm/test/Analysis/CostModel/X86/scatter-i64-with-i8-index.ll @@ -14,7 +14,7 @@ @B = global [1024 x i64] zeroinitializer, align 128 @C = global [1024 x i64] zeroinitializer, align 128 -; CHECK: LV: Checking a loop in "test" +; CHECK: LV: Checking a loop in 'test' ; ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i64 %valB, i64* %out, align 8 ; SSE2: LV: Found an estimated cost of 29 for VF 2 For instruction: store i64 %valB, i64* %out, align 8 diff --git a/llvm/test/Analysis/CostModel/X86/scatter-i8-with-i8-index.ll b/llvm/test/Analysis/CostModel/X86/scatter-i8-with-i8-index.ll --- a/llvm/test/Analysis/CostModel/X86/scatter-i8-with-i8-index.ll +++ b/llvm/test/Analysis/CostModel/X86/scatter-i8-with-i8-index.ll @@ -14,7 +14,7 @@ @B = global [1024 x i8] zeroinitializer, align 128 @C = global [1024 x i8] zeroinitializer, align 128 -; CHECK: LV: Checking a loop in "test" +; CHECK: LV: Checking a loop in 'test' ; ; SSE2: LV: Found an estimated cost of 1 for VF 1 For instruction: store i8 %valB, i8* %out, align 1 ; SSE2: LV: Found an estimated cost of 29 for VF 2 For instruction: store i8 %valB, i8* %out, align 1 diff --git a/llvm/test/CodeGen/AArch64/arm64-vshuffle.ll b/llvm/test/CodeGen/AArch64/arm64-vshuffle.ll --- a/llvm/test/CodeGen/AArch64/arm64-vshuffle.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vshuffle.ll @@ -1,7 +1,7 @@ ; RUN: llc < %s -mtriple=arm64-apple-ios7.0 -mcpu=cyclone | FileCheck %s -; CHECK: test1 +; CHECK-LABEL: test1 ; CHECK: movi.16b v[[REG0:[0-9]+]], #0 define <8 x i1> @test1() { entry: @@ -14,16 +14,16 @@ ret <8 x i1> %Shuff } -; CHECK: lCPI1_0: -; CHECK: .byte 0 ; 0x0 -; CHECK: .byte 0 ; 0x0 +; CHECK-LABEL: lCPI1_0: ; CHECK: .byte 0 ; 0x0 +; CHECK: .space 1 ; CHECK: .byte 0 ; 0x0 +; CHECK: .space 1 ; CHECK: .byte 1 ; 0x1 ; CHECK: .byte 0 ; 0x0 ; CHECK: .byte 0 ; 0x0 ; CHECK: .byte 0 ; 0x0 -; CHECK: test2 +; CHECK-LABEL: test2 ; CHECK: adrp x[[REG2:[0-9]+]], lCPI1_0@PAGE ; CHECK: ldr d[[REG1:[0-9]+]], [x[[REG2]], lCPI1_0@PAGEOFF] define <8 x i1>@test2() { @@ -35,7 +35,7 @@ ret <8 x i1> %Shuff } -; CHECK: test3 +; CHECK-LABEL: test3 ; CHECK: movi.4s v{{[0-9]+}}, #1 define <16 x i1> @test3(i1* %ptr, i32 %v) { bb: @@ -45,7 +45,7 @@ i32 14, i32 0> ret <16 x i1> %Shuff } -; CHECK: lCPI3_0: +; CHECK-LABEL: lCPI3_0: ; CHECK: .byte 0 ; 0x0 ; CHECK: .byte 0 ; 0x0 ; CHECK: .byte 0 ; 0x0 @@ -62,7 +62,7 @@ ; CHECK: .byte 0 ; 0x0 ; CHECK: .byte 0 ; 0x0 ; CHECK: .byte 0 ; 0x0 -; CHECK: _test4: +; CHECK-LABEL: _test4: ; CHECK: adrp x[[REG3:[0-9]+]], lCPI3_0@PAGE ; CHECK: ldr q[[REG2:[0-9]+]], [x[[REG3]], lCPI3_0@PAGEOFF] define <16 x i1> @test4(i1* %ptr, i32 %v) { diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll --- a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll +++ b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll @@ -3004,55 +3004,22 @@ ; CHECK-LABEL: test_signed_v16f32_v16i8: ; CHECK: // %bb.0: ; CHECK-NEXT: movi v4.4s, #127 +; CHECK-NEXT: fcvtzs v3.4s, v3.4s +; CHECK-NEXT: fcvtzs v2.4s, v2.4s +; CHECK-NEXT: fcvtzs v1.4s, v1.4s ; CHECK-NEXT: fcvtzs v0.4s, v0.4s ; CHECK-NEXT: mvni v5.4s, #127 -; CHECK-NEXT: fcvtzs v1.4s, v1.4s -; CHECK-NEXT: fcvtzs v2.4s, v2.4s -; CHECK-NEXT: smin v0.4s, v0.4s, v4.4s -; CHECK-NEXT: smin v1.4s, v1.4s, v4.4s +; CHECK-NEXT: smin v3.4s, v3.4s, v4.4s ; CHECK-NEXT: smin v2.4s, v2.4s, v4.4s -; CHECK-NEXT: smax v0.4s, v0.4s, v5.4s -; CHECK-NEXT: smax v1.4s, v1.4s, v5.4s -; CHECK-NEXT: smax v2.4s, v2.4s, v5.4s -; CHECK-NEXT: xtn v6.4h, v0.4s -; CHECK-NEXT: umov w8, v6.h[0] -; CHECK-NEXT: umov w9, v6.h[1] -; CHECK-NEXT: xtn v1.4h, v1.4s -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: umov w8, v6.h[2] -; CHECK-NEXT: mov v0.b[1], w9 -; CHECK-NEXT: mov v0.b[2], w8 -; CHECK-NEXT: umov w8, v6.h[3] -; CHECK-NEXT: mov v0.b[3], w8 -; CHECK-NEXT: umov w8, v1.h[0] -; CHECK-NEXT: mov v0.b[4], w8 -; CHECK-NEXT: umov w8, v1.h[1] -; CHECK-NEXT: mov v0.b[5], w8 -; CHECK-NEXT: umov w8, v1.h[2] -; CHECK-NEXT: mov v0.b[6], w8 -; CHECK-NEXT: umov w8, v1.h[3] -; CHECK-NEXT: xtn v1.4h, v2.4s -; CHECK-NEXT: fcvtzs v2.4s, v3.4s -; CHECK-NEXT: mov v0.b[7], w8 -; CHECK-NEXT: umov w8, v1.h[0] -; CHECK-NEXT: smin v2.4s, v2.4s, v4.4s -; CHECK-NEXT: mov v0.b[8], w8 -; CHECK-NEXT: umov w8, v1.h[1] +; CHECK-NEXT: smin v1.4s, v1.4s, v4.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v4.4s +; CHECK-NEXT: smax v3.4s, v3.4s, v5.4s ; CHECK-NEXT: smax v2.4s, v2.4s, v5.4s -; CHECK-NEXT: mov v0.b[9], w8 -; CHECK-NEXT: umov w8, v1.h[2] -; CHECK-NEXT: mov v0.b[10], w8 -; CHECK-NEXT: umov w8, v1.h[3] -; CHECK-NEXT: xtn v1.4h, v2.4s -; CHECK-NEXT: mov v0.b[11], w8 -; CHECK-NEXT: umov w8, v1.h[0] -; CHECK-NEXT: mov v0.b[12], w8 -; CHECK-NEXT: umov w8, v1.h[1] -; CHECK-NEXT: mov v0.b[13], w8 -; CHECK-NEXT: umov w8, v1.h[2] -; CHECK-NEXT: mov v0.b[14], w8 -; CHECK-NEXT: umov w8, v1.h[3] -; CHECK-NEXT: mov v0.b[15], w8 +; CHECK-NEXT: smax v1.4s, v1.4s, v5.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v5.4s +; CHECK-NEXT: uzp1 v2.8h, v2.8h, v3.8h +; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h +; CHECK-NEXT: uzp1 v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ret %x = call <16 x i8> @llvm.fptosi.sat.v16f32.v16i8(<16 x float> %f) ret <16 x i8> %x diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll --- a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll +++ b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll @@ -2515,50 +2515,17 @@ ; CHECK-LABEL: test_unsigned_v16f32_v16i8: ; CHECK: // %bb.0: ; CHECK-NEXT: movi v4.2d, #0x0000ff000000ff -; CHECK-NEXT: fcvtzu v0.4s, v0.4s -; CHECK-NEXT: fcvtzu v1.4s, v1.4s +; CHECK-NEXT: fcvtzu v3.4s, v3.4s ; CHECK-NEXT: fcvtzu v2.4s, v2.4s -; CHECK-NEXT: umin v0.4s, v0.4s, v4.4s -; CHECK-NEXT: umin v1.4s, v1.4s, v4.4s -; CHECK-NEXT: umin v2.4s, v2.4s, v4.4s -; CHECK-NEXT: xtn v5.4h, v0.4s -; CHECK-NEXT: xtn v1.4h, v1.4s -; CHECK-NEXT: umov w8, v5.h[0] -; CHECK-NEXT: umov w9, v5.h[1] -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: umov w8, v5.h[2] -; CHECK-NEXT: mov v0.b[1], w9 -; CHECK-NEXT: mov v0.b[2], w8 -; CHECK-NEXT: umov w8, v5.h[3] -; CHECK-NEXT: mov v0.b[3], w8 -; CHECK-NEXT: umov w8, v1.h[0] -; CHECK-NEXT: mov v0.b[4], w8 -; CHECK-NEXT: umov w8, v1.h[1] -; CHECK-NEXT: mov v0.b[5], w8 -; CHECK-NEXT: umov w8, v1.h[2] -; CHECK-NEXT: mov v0.b[6], w8 -; CHECK-NEXT: umov w8, v1.h[3] -; CHECK-NEXT: xtn v1.4h, v2.4s -; CHECK-NEXT: fcvtzu v2.4s, v3.4s -; CHECK-NEXT: mov v0.b[7], w8 -; CHECK-NEXT: umov w8, v1.h[0] +; CHECK-NEXT: fcvtzu v1.4s, v1.4s +; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: umin v3.4s, v3.4s, v4.4s ; CHECK-NEXT: umin v2.4s, v2.4s, v4.4s -; CHECK-NEXT: mov v0.b[8], w8 -; CHECK-NEXT: umov w8, v1.h[1] -; CHECK-NEXT: mov v0.b[9], w8 -; CHECK-NEXT: umov w8, v1.h[2] -; CHECK-NEXT: mov v0.b[10], w8 -; CHECK-NEXT: umov w8, v1.h[3] -; CHECK-NEXT: xtn v1.4h, v2.4s -; CHECK-NEXT: mov v0.b[11], w8 -; CHECK-NEXT: umov w8, v1.h[0] -; CHECK-NEXT: mov v0.b[12], w8 -; CHECK-NEXT: umov w8, v1.h[1] -; CHECK-NEXT: mov v0.b[13], w8 -; CHECK-NEXT: umov w8, v1.h[2] -; CHECK-NEXT: mov v0.b[14], w8 -; CHECK-NEXT: umov w8, v1.h[3] -; CHECK-NEXT: mov v0.b[15], w8 +; CHECK-NEXT: umin v1.4s, v1.4s, v4.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v4.4s +; CHECK-NEXT: uzp1 v2.8h, v2.8h, v3.8h +; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h +; CHECK-NEXT: uzp1 v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ret %x = call <16 x i8> @llvm.fptoui.sat.v16f32.v16i8(<16 x float> %f) ret <16 x i8> %x diff --git a/llvm/test/CodeGen/AArch64/neon-extracttruncate.ll b/llvm/test/CodeGen/AArch64/neon-extracttruncate.ll --- a/llvm/test/CodeGen/AArch64/neon-extracttruncate.ll +++ b/llvm/test/CodeGen/AArch64/neon-extracttruncate.ll @@ -84,43 +84,13 @@ define <16 x i8> @extract_4_v4i16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c, <4 x i16> %d) { ; CHECK-LABEL: extract_4_v4i16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: umov w9, v0.h[0] -; CHECK-NEXT: umov w10, v0.h[1] -; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 -; CHECK-NEXT: umov w8, v2.h[0] +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: // kill: def $d3 killed $d3 def $q3 -; CHECK-NEXT: fmov s4, w9 -; CHECK-NEXT: umov w9, v0.h[2] -; CHECK-NEXT: mov v4.b[1], w10 -; CHECK-NEXT: umov w10, v0.h[3] -; CHECK-NEXT: mov v4.b[2], w9 -; CHECK-NEXT: umov w9, v1.h[0] -; CHECK-NEXT: mov v4.b[3], w10 -; CHECK-NEXT: umov w10, v1.h[1] -; CHECK-NEXT: mov v4.b[4], w9 -; CHECK-NEXT: umov w9, v1.h[2] -; CHECK-NEXT: mov v4.b[5], w10 -; CHECK-NEXT: umov w10, v1.h[3] -; CHECK-NEXT: mov v4.b[6], w9 -; CHECK-NEXT: umov w9, v2.h[1] -; CHECK-NEXT: mov v4.b[7], w10 -; CHECK-NEXT: mov v4.b[8], w8 -; CHECK-NEXT: umov w8, v2.h[2] -; CHECK-NEXT: mov v4.b[9], w9 -; CHECK-NEXT: umov w9, v2.h[3] -; CHECK-NEXT: mov v4.b[10], w8 -; CHECK-NEXT: umov w8, v3.h[0] -; CHECK-NEXT: mov v4.b[11], w9 -; CHECK-NEXT: umov w9, v3.h[1] -; CHECK-NEXT: mov v4.b[12], w8 -; CHECK-NEXT: umov w8, v3.h[2] -; CHECK-NEXT: mov v4.b[13], w9 -; CHECK-NEXT: umov w9, v3.h[3] -; CHECK-NEXT: mov v4.b[14], w8 -; CHECK-NEXT: mov v4.b[15], w9 -; CHECK-NEXT: mov v0.16b, v4.16b +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: mov v2.d[1], v3.d[0] +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: uzp1 v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ret entry: %a0 = extractelement <4 x i16> %a, i32 0 @@ -177,36 +147,9 @@ define <16 x i8> @extract_4_v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d) { ; CHECK-LABEL: extract_4_v4i32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, v0.s[1] -; CHECK-NEXT: mov w9, v0.s[2] -; CHECK-NEXT: mov w10, v0.s[3] -; CHECK-NEXT: mov v0.b[1], w8 -; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: mov v0.b[2], w9 -; CHECK-NEXT: mov w9, v1.s[1] -; CHECK-NEXT: mov v0.b[3], w10 -; CHECK-NEXT: mov v0.b[4], w8 -; CHECK-NEXT: mov w8, v1.s[2] -; CHECK-NEXT: mov v0.b[5], w9 -; CHECK-NEXT: mov w9, v1.s[3] -; CHECK-NEXT: mov v0.b[6], w8 -; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: mov v0.b[7], w9 -; CHECK-NEXT: mov w9, v2.s[1] -; CHECK-NEXT: mov v0.b[8], w8 -; CHECK-NEXT: mov w8, v2.s[2] -; CHECK-NEXT: mov v0.b[9], w9 -; CHECK-NEXT: mov w9, v2.s[3] -; CHECK-NEXT: mov v0.b[10], w8 -; CHECK-NEXT: fmov w8, s3 -; CHECK-NEXT: mov v0.b[11], w9 -; CHECK-NEXT: mov w9, v3.s[1] -; CHECK-NEXT: mov v0.b[12], w8 -; CHECK-NEXT: mov w8, v3.s[2] -; CHECK-NEXT: mov v0.b[13], w9 -; CHECK-NEXT: mov w9, v3.s[3] -; CHECK-NEXT: mov v0.b[14], w8 -; CHECK-NEXT: mov v0.b[15], w9 +; CHECK-NEXT: uzp1 v2.8h, v2.8h, v3.8h +; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h +; CHECK-NEXT: uzp1 v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ret entry: %a0 = extractelement <4 x i32> %a, i32 0 @@ -263,41 +206,12 @@ define <16 x i8> @extract_4_mixed(<4 x i16> %a, <4 x i32> %b, <4 x i32> %c, <4 x i16> %d) { ; CHECK-LABEL: extract_4_mixed: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: umov w8, v0.h[0] -; CHECK-NEXT: umov w9, v0.h[1] +; CHECK-NEXT: xtn v2.4h, v2.4s ; CHECK-NEXT: // kill: def $d3 killed $d3 def $q3 -; CHECK-NEXT: fmov s4, w8 -; CHECK-NEXT: umov w8, v0.h[2] -; CHECK-NEXT: mov v4.b[1], w9 -; CHECK-NEXT: umov w9, v0.h[3] -; CHECK-NEXT: mov v4.b[2], w8 -; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: mov v4.b[3], w9 -; CHECK-NEXT: mov w9, v1.s[1] -; CHECK-NEXT: mov v4.b[4], w8 -; CHECK-NEXT: mov w8, v1.s[2] -; CHECK-NEXT: mov v4.b[5], w9 -; CHECK-NEXT: mov w9, v1.s[3] -; CHECK-NEXT: mov v4.b[6], w8 -; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: mov v4.b[7], w9 -; CHECK-NEXT: mov w9, v2.s[1] -; CHECK-NEXT: mov v4.b[8], w8 -; CHECK-NEXT: mov w8, v2.s[2] -; CHECK-NEXT: mov v4.b[9], w9 -; CHECK-NEXT: mov w9, v2.s[3] -; CHECK-NEXT: mov v4.b[10], w8 -; CHECK-NEXT: umov w8, v3.h[0] -; CHECK-NEXT: mov v4.b[11], w9 -; CHECK-NEXT: umov w9, v3.h[1] -; CHECK-NEXT: mov v4.b[12], w8 -; CHECK-NEXT: umov w8, v3.h[2] -; CHECK-NEXT: mov v4.b[13], w9 -; CHECK-NEXT: umov w9, v3.h[3] -; CHECK-NEXT: mov v4.b[14], w8 -; CHECK-NEXT: mov v4.b[15], w9 -; CHECK-NEXT: mov v0.16b, v4.16b +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: xtn2 v0.8h, v1.4s +; CHECK-NEXT: mov v2.d[1], v3.d[0] +; CHECK-NEXT: uzp1 v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ret entry: %a0 = extractelement <4 x i16> %a, i32 0 @@ -440,25 +354,8 @@ define <16 x i8> @extract_4_v4i32_one(<4 x i32> %a) { ; CHECK-LABEL: extract_4_v4i32_one: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, v0.s[1] -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: mov w10, v0.s[2] -; CHECK-NEXT: mov w11, v0.s[3] -; CHECK-NEXT: mov v0.b[1], w8 -; CHECK-NEXT: mov v0.b[2], w10 -; CHECK-NEXT: mov v0.b[3], w11 -; CHECK-NEXT: mov v0.b[4], w9 -; CHECK-NEXT: mov v0.b[5], w8 -; CHECK-NEXT: mov v0.b[6], w10 -; CHECK-NEXT: mov v0.b[7], w11 -; CHECK-NEXT: mov v0.b[8], w9 -; CHECK-NEXT: mov v0.b[9], w8 -; CHECK-NEXT: mov v0.b[10], w10 -; CHECK-NEXT: mov v0.b[11], w11 -; CHECK-NEXT: mov v0.b[12], w9 -; CHECK-NEXT: mov v0.b[13], w8 -; CHECK-NEXT: mov v0.b[14], w10 -; CHECK-NEXT: mov v0.b[15], w11 +; CHECK-NEXT: uzp1 v0.8h, v0.8h, v0.8h +; CHECK-NEXT: uzp1 v0.16b, v0.16b, v0.16b ; CHECK-NEXT: ret entry: %a0 = extractelement <4 x i32> %a, i32 0 diff --git a/llvm/test/CodeGen/AMDGPU/back-off-barrier-subtarget-feature.ll b/llvm/test/CodeGen/AMDGPU/back-off-barrier-subtarget-feature.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/back-off-barrier-subtarget-feature.ll @@ -0,0 +1,97 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX9-NO-BACKOFF %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX9-BACKOFF %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX9-BACKOFF %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -mattr=-back-off-barrier -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX9-NO-BACKOFF %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10-BACKOFF %s + +; Subtargets must wait for outstanding memory instructions before a barrier if +; they cannot back off of the barrier. + +define void @back_off_barrier_no_fence(i32* %in, i32* %out) #0 { +; GFX9-NO-BACKOFF-LABEL: back_off_barrier_no_fence: +; GFX9-NO-BACKOFF: ; %bb.0: +; GFX9-NO-BACKOFF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NO-BACKOFF-NEXT: flat_load_dword v0, v[0:1] +; GFX9-NO-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NO-BACKOFF-NEXT: s_barrier +; GFX9-NO-BACKOFF-NEXT: flat_store_dword v[2:3], v0 +; GFX9-NO-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NO-BACKOFF-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-BACKOFF-LABEL: back_off_barrier_no_fence: +; GFX9-BACKOFF: ; %bb.0: +; GFX9-BACKOFF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-BACKOFF-NEXT: flat_load_dword v0, v[0:1] +; GFX9-BACKOFF-NEXT: s_barrier +; GFX9-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-BACKOFF-NEXT: flat_store_dword v[2:3], v0 +; GFX9-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-BACKOFF-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-BACKOFF-LABEL: back_off_barrier_no_fence: +; GFX10-BACKOFF: ; %bb.0: +; GFX10-BACKOFF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-BACKOFF-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-BACKOFF-NEXT: flat_load_dword v0, v[0:1] +; GFX10-BACKOFF-NEXT: s_barrier +; GFX10-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-BACKOFF-NEXT: flat_store_dword v[2:3], v0 +; GFX10-BACKOFF-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-BACKOFF-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-BACKOFF-NEXT: s_setpc_b64 s[30:31] + %load = load i32, i32* %in + call void @llvm.amdgcn.s.barrier() + store i32 %load, i32* %out + ret void +} + +define void @back_off_barrier_with_fence(i32* %in, i32* %out) #0 { +; GFX9-NO-BACKOFF-LABEL: back_off_barrier_with_fence: +; GFX9-NO-BACKOFF: ; %bb.0: +; GFX9-NO-BACKOFF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NO-BACKOFF-NEXT: flat_load_dword v0, v[0:1] +; GFX9-NO-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NO-BACKOFF-NEXT: s_barrier +; GFX9-NO-BACKOFF-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NO-BACKOFF-NEXT: flat_store_dword v[2:3], v0 +; GFX9-NO-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-NO-BACKOFF-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-BACKOFF-LABEL: back_off_barrier_with_fence: +; GFX9-BACKOFF: ; %bb.0: +; GFX9-BACKOFF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-BACKOFF-NEXT: flat_load_dword v0, v[0:1] +; GFX9-BACKOFF-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-BACKOFF-NEXT: s_barrier +; GFX9-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-BACKOFF-NEXT: flat_store_dword v[2:3], v0 +; GFX9-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-BACKOFF-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-BACKOFF-LABEL: back_off_barrier_with_fence: +; GFX10-BACKOFF: ; %bb.0: +; GFX10-BACKOFF-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-BACKOFF-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-BACKOFF-NEXT: flat_load_dword v0, v[0:1] +; GFX10-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-BACKOFF-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-BACKOFF-NEXT: s_barrier +; GFX10-BACKOFF-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX10-BACKOFF-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-BACKOFF-NEXT: buffer_gl0_inv +; GFX10-BACKOFF-NEXT: flat_store_dword v[2:3], v0 +; GFX10-BACKOFF-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-BACKOFF-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX10-BACKOFF-NEXT: s_setpc_b64 s[30:31] + %load = load i32, i32* %in + fence syncscope("workgroup") release + call void @llvm.amdgcn.s.barrier() + fence syncscope("workgroup") acquire + store i32 %load, i32* %out + ret void +} + +declare void @llvm.amdgcn.s.barrier() + +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/AMDGPU/ipra-return-address-save-restore.ll b/llvm/test/CodeGen/AMDGPU/ipra-return-address-save-restore.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/ipra-return-address-save-restore.ll @@ -0,0 +1,199 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -enable-ipra=1 < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -enable-ipra=0 < %s | FileCheck -check-prefix=GCN %s + +; This test is to make sure the return address registers, if clobbered in the +; function or the function has calls, are save/restored when IPRA is enabled/disabled. + +; TODO: An artificial test with high register pressure would be more reliable in the +; long run as branches on constants could be fragile. + +%struct.ShaderData = type { <3 x float>, <3 x float>, <3 x float>, <3 x float>, i32, i32, i32, i32, i32, float, float, i32, i32, float, float, %struct.differential3, %struct.differential3, %struct.differential, %struct.differential, <3 x float>, <3 x float>, <3 x float>, %struct.differential3, i32, i32, i32, float, <3 x float>, <3 x float>, <3 x float>, [1 x %struct.ShaderClosure] } +%struct.differential = type { float, float } +%struct.differential3 = type { <3 x float>, <3 x float> } +%struct.ShaderClosure = type { <3 x float>, i32, float, <3 x float>, [10 x float], [8 x i8] } +%struct.MicrofacetExtra = type { <3 x float>, <3 x float>, <3 x float>, float, [12 x i8] } + +; Function Attrs: nofree nosync nounwind readnone speculatable willreturn +declare float @llvm.fmuladd.f32(float, float, float) #0 + +; Function Attrs: nofree nosync nounwind readnone speculatable willreturn +declare <3 x float> @llvm.fmuladd.v3f32(<3 x float>, <3 x float>, <3 x float>) #0 + +; Function Attrs: nofree nosync nounwind readnone speculatable willreturn +declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>) #0 + +; Function Attrs: argmemonly nofree nosync nounwind willreturn +declare void @llvm.lifetime.end.p5i8(i64 immarg, i8 addrspace(5)* nocapture) #1 + +; Function Attrs: norecurse +define internal fastcc void @svm_node_closure_bsdf(%struct.ShaderData addrspace(1)* %sd, float* %stack, <4 x i32> %node, i32* %offset, i32 %0, i8 %trunc, float %1, float %2, float %mul80, i1 %cmp412.old, <4 x i32> %3, float %4, i32 %5, i1 %cmp440, i1 %cmp442, i1 %or.cond1306, float %.op, %struct.ShaderClosure addrspace(1)* %arrayidx.i.i2202, %struct.ShaderClosure addrspace(1)* %retval.0.i.i22089, %struct.ShaderClosure addrspace(1)* %retval.1.i221310, i1 %cmp575, i32 addrspace(1)* %num_closure_left.i2215, i32 %6, i1 %cmp.i2216, i32 %7, i64 %idx.ext.i2223, i32 %sub5.i2221) #2 { +; GCN-LABEL: {{^}}svm_node_closure_bsdf: +; GCN-NOT: s30, +; GCN-NOT: s31, +; GCN: s_waitcnt vmcnt(0) +; GCN: s_setpc_b64 s[30:31] +; GCN: .size svm_node_closure_bsdf +entry: + %8 = extractelement <4 x i32> %node, i64 0 + %cmp.i.not = icmp eq i32 undef, 0 + br i1 undef, label %common.ret.critedge, label %cond.true + +cond.true: ; preds = %entry + %9 = load float, float* null, align 4 + %phi.cmp = fcmp oeq float %9, 0.000000e+00 + br i1 %phi.cmp, label %common.ret, label %cond.true20 + +cond.true20: ; preds = %cond.true + %trunc1 = trunc i32 %0 to i8 + switch i8 %trunc, label %common.ret [ + i8 44, label %sw.bb + i8 0, label %if.end.i.i2285 + ] + +sw.bb: ; preds = %cond.true20 + %10 = load float, float* null, align 4 + %11 = load float, float* null, align 4 + %12 = tail call float @llvm.amdgcn.fmed3.f32(float %1, float 0.000000e+00, float 0.000000e+00) + %mul802 = fmul nsz float %1, 0.000000e+00 + %cmp412.old3 = fcmp nsz ogt float %1, 0.000000e+00 + br i1 %cmp412.old, label %if.then413, label %common.ret + +if.then413: ; preds = %sw.bb + %13 = load <4 x i32>, <4 x i32> addrspace(1)* null, align 16 + %14 = extractelement <4 x i32> %node, i64 0 + %cmp4404 = fcmp nsz ole float %1, 0.000000e+00 + %cmp4425 = icmp eq i32 %0, 0 + %or.cond13066 = select i1 %cmp412.old, i1 false, i1 %cmp412.old + br i1 %or.cond1306, label %if.then443, label %if.else568 + +if.then443: ; preds = %if.then413 + br i1 true, label %if.end511, label %common.ret + +common.ret.critedge: ; preds = %entry + store i32 0, i32* null, align 4 + br label %common.ret + +common.ret: ; preds = %if.end.i.i2285, %if.end627.sink.split, %cond.end579, %bsdf_alloc.exit2188, %if.end511, %common.ret.critedge, %if.then443, %sw.bb, %cond.true20, %cond.true + ret void + +if.end511: ; preds = %if.then443 + br i1 false, label %common.ret, label %if.then519 + +if.then519: ; preds = %if.end511 + br i1 false, label %bsdf_alloc.exit2188, label %if.then.i2172 + +if.then.i2172: ; preds = %if.then519 + br i1 false, label %closure_alloc.exit.i2184, label %if.end.i.i2181 + +if.end.i.i2181: ; preds = %if.then.i2172 + br label %closure_alloc.exit.i2184 + +closure_alloc.exit.i2184: ; preds = %if.end.i.i2181, %if.then.i2172 + br i1 false, label %bsdf_alloc.exit2188, label %if.end.i2186 + +if.end.i2186: ; preds = %closure_alloc.exit.i2184 + br label %bsdf_alloc.exit2188 + +bsdf_alloc.exit2188: ; preds = %if.end.i2186, %closure_alloc.exit.i2184, %if.then519 + br i1 false, label %common.ret, label %if.then534 + +if.then534: ; preds = %bsdf_alloc.exit2188 + %.op7 = fmul nsz float undef, 0.000000e+00 + %mul558 = select i1 %cmp440, float 0.000000e+00, float %1 + %15 = tail call float @llvm.amdgcn.fmed3.f32(float 0.000000e+00, float 0.000000e+00, float 0.000000e+00) + store float %mul558, float addrspace(1)* null, align 4 + br label %if.end627.sink.split + +if.else568: ; preds = %if.then413 + br i1 undef, label %bsdf_alloc.exit2214, label %if.then.i2198 + +if.then.i2198: ; preds = %if.else568 + br i1 undef, label %closure_alloc.exit.i2210, label %if.end.i.i2207 + +if.end.i.i2207: ; preds = %if.then.i2198 + %arrayidx.i.i22028 = getelementptr inbounds %struct.ShaderData, %struct.ShaderData addrspace(1)* %sd, i64 0, i32 30, i64 undef + br label %closure_alloc.exit.i2210 + +closure_alloc.exit.i2210: ; preds = %if.end.i.i2207, %if.then.i2198 + %retval.0.i.i220899 = phi %struct.ShaderClosure addrspace(1)* [ %arrayidx.i.i2202, %if.end.i.i2207 ], [ null, %if.then.i2198 ] + br i1 false, label %bsdf_alloc.exit2214, label %if.end.i2212 + +if.end.i2212: ; preds = %closure_alloc.exit.i2210 + br label %bsdf_alloc.exit2214 + +bsdf_alloc.exit2214: ; preds = %if.end.i2212, %closure_alloc.exit.i2210, %if.else568 + %retval.1.i22131010 = phi %struct.ShaderClosure addrspace(1)* [ %arrayidx.i.i2202, %if.end.i2212 ], [ null, %closure_alloc.exit.i2210 ], [ null, %if.else568 ] + %cmp57511 = icmp ne %struct.ShaderClosure addrspace(1)* %arrayidx.i.i2202, null + br i1 %cmp442, label %cond.true576, label %cond.end579 + +cond.true576: ; preds = %bsdf_alloc.exit2214 + %num_closure_left.i221512 = getelementptr inbounds %struct.ShaderData, %struct.ShaderData addrspace(1)* %sd, i64 0, i32 25 + %16 = load i32, i32 addrspace(1)* %num_closure_left.i2215, align 8 + %cmp.i221613 = icmp slt i32 %0, 0 + br i1 %cmp440, label %cond.end579, label %if.end.i2227 + +if.end.i2227: ; preds = %cond.true576 + %sub5.i222114 = add nuw nsw i32 %0, 0 + %17 = load i32, i32 addrspace(1)* null, align 4294967296 + %idx.ext.i222315 = sext i32 %0 to i64 + %add.ptr.i2224 = getelementptr inbounds %struct.ShaderData, %struct.ShaderData addrspace(1)* %sd, i64 0, i32 30, i64 %idx.ext.i2223 + %idx.ext8.i22252724 = zext i32 %0 to i64 + %add.ptr9.i2226 = getelementptr inbounds %struct.ShaderClosure, %struct.ShaderClosure addrspace(1)* %add.ptr.i2224, i64 %idx.ext8.i22252724 + %phi.cast2731 = bitcast %struct.ShaderClosure addrspace(1)* %add.ptr9.i2226 to %struct.MicrofacetExtra addrspace(1)* + br label %cond.end579 + +cond.end579: ; preds = %if.end.i2227, %cond.true576, %bsdf_alloc.exit2214 + %cond580 = phi %struct.MicrofacetExtra addrspace(1)* [ null, %bsdf_alloc.exit2214 ], [ %phi.cast2731, %if.end.i2227 ], [ null, %cond.true576 ] + %tobool583 = icmp ne %struct.MicrofacetExtra addrspace(1)* %cond580, null + %or.cond1308 = select i1 %cmp442, i1 %tobool583, i1 false + br i1 %or.cond1308, label %if.then584, label %common.ret + +if.then584: ; preds = %cond.end579 + store %struct.MicrofacetExtra addrspace(1)* null, %struct.MicrofacetExtra addrspace(1)* addrspace(1)* null, align 4294967296 + br label %if.end627.sink.split + +if.end627.sink.split: ; preds = %if.then584, %if.then534 + store i32 0, i32 addrspace(1)* null, align 4 + br label %common.ret + +if.end.i.i2285: ; preds = %cond.true20 + store i32 0, i32 addrspace(1)* null, align 4294967296 + br label %common.ret +} + +define internal fastcc void @svm_eval_nodes(%struct.ShaderData addrspace(1)* %sd) { +sw.bb10: +; GCN-LABEL: {{^}}svm_eval_nodes: +; GCN-DAG: v_writelane_b32 [[CSR_VGPR:v[0-9]+]], s30, +; GCN-DAG: v_writelane_b32 [[CSR_VGPR]], s31, +; GCN: s_swappc_b64 s[30:31] +; GCN-DAG: v_readlane_b32 s4, [[CSR_VGPR]], +; GCN-DAG: v_readlane_b32 s5, [[CSR_VGPR]], +; GCN: s_waitcnt vmcnt(0) +; GCN: s_setpc_b64 s[4:5] + call fastcc void @svm_node_closure_bsdf(%struct.ShaderData addrspace(1)* null, float* null, <4 x i32> zeroinitializer, i32* null, i32 undef, i8 undef, float undef, float undef, float undef, i1 undef, <4 x i32> undef, float undef, i32 undef, i1 undef, i1 undef, i1 undef, float undef, %struct.ShaderClosure addrspace(1)* undef, %struct.ShaderClosure addrspace(1)* undef, %struct.ShaderClosure addrspace(1)* undef, i1 undef, i32 addrspace(1)* undef, i32 undef, i1 undef, i32 undef, i64 undef, i32 undef) + ret void +} + +define amdgpu_kernel void @kernel_ocl_path_trace_shadow_blocked_dl() { +kernel_set_buffer_pointers.exit: +; GCN-LABEL: {{^}}kernel_ocl_path_trace_shadow_blocked_dl: +; GCN: s_swappc_b64 s[30:31] +; GCN: endpgm + tail call fastcc void @svm_eval_nodes(%struct.ShaderData addrspace(1)* null) + ret void +} + +; Function Attrs: nofree nosync nounwind readnone speculatable willreturn +declare float @llvm.fabs.f32(float) #0 + +; Function Attrs: nofree nosync nounwind readnone speculatable willreturn +declare float @llvm.maxnum.f32(float, float) #0 + +; Function Attrs: nounwind readnone speculatable willreturn +declare float @llvm.amdgcn.fmed3.f32(float, float, float) #3 + +attributes #0 = { nofree nosync nounwind readnone speculatable willreturn } +attributes #1 = { argmemonly nofree nosync nounwind willreturn } +attributes #2 = { norecurse } +attributes #3 = { nounwind readnone speculatable willreturn } diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-preexisting-vscnt.mir b/llvm/test/CodeGen/AMDGPU/waitcnt-preexisting-vscnt.mir --- a/llvm/test/CodeGen/AMDGPU/waitcnt-preexisting-vscnt.mir +++ b/llvm/test/CodeGen/AMDGPU/waitcnt-preexisting-vscnt.mir @@ -35,7 +35,7 @@ ; GFX10: S_WAITCNT 0 ; GFX10: S_WAITCNT_VSCNT undef $sgpr_null, 0 ; GFX10: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec - ; GFX10: S_WAITCNT_VSCNT undef $sgpr_null, 0 + ; GFX10: S_WAITCNT_VSCNT undef $sgpr_null, 1 ; GFX10: S_BARRIER ; GFX10: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr ; GFX10: S_WAITCNT 112 @@ -112,7 +112,7 @@ ; GFX10: S_WAITCNT_VSCNT undef $sgpr_null, 0 ; GFX10: GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec ; GFX10: S_WAITCNT 0 - ; GFX10: S_WAITCNT_VSCNT undef $sgpr_null, 0 + ; GFX10: S_WAITCNT_VSCNT undef $sgpr_null, 1 ; GFX10: S_BARRIER ; GFX10: $vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr ; GFX10: S_WAITCNT 112 diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt-vscnt.ll b/llvm/test/CodeGen/AMDGPU/waitcnt-vscnt.ll --- a/llvm/test/CodeGen/AMDGPU/waitcnt-vscnt.ll +++ b/llvm/test/CodeGen/AMDGPU/waitcnt-vscnt.ll @@ -1,6 +1,6 @@ ; RUN: llc -march=amdgcn -mcpu=gfx802 -asm-verbose=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8,GFX8_9 %s ; RUN: llc -march=amdgcn -mcpu=gfx900 -asm-verbose=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9_10,GFX8_9 %s -; RUN: llc -march=amdgcn -mcpu=gfx1010 -asm-verbose=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10,GFX9_10 %s +; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-back-off-barrier -asm-verbose=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10,GFX9_10 %s ; GCN-LABEL: barrier_vmcnt_global: ; GFX8: flat_load_dword @@ -42,7 +42,7 @@ %tmp5 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %tmp4 store i32 0, i32 addrspace(1)* %tmp5, align 4 fence syncscope("singlethread") release - tail call void @llvm.amdgcn.s.barrier() #3 + tail call void @llvm.amdgcn.s.barrier() fence syncscope("singlethread") acquire %tmp6 = add nuw nsw i64 %tmp2, 4294967296 %tmp7 = lshr exact i64 %tmp6, 32 @@ -116,7 +116,7 @@ %tmp5 = getelementptr inbounds i32, i32* %arg, i64 %tmp4 store i32 0, i32* %tmp5, align 4 fence syncscope("singlethread") release - tail call void @llvm.amdgcn.s.barrier() #3 + tail call void @llvm.amdgcn.s.barrier() fence syncscope("singlethread") acquire %tmp6 = add nuw nsw i64 %tmp2, 4294967296 %tmp7 = lshr exact i64 %tmp6, 32 diff --git a/llvm/test/CodeGen/LoongArch/1ri.mir b/llvm/test/CodeGen/LoongArch/1ri.mir --- a/llvm/test/CodeGen/LoongArch/1ri.mir +++ b/llvm/test/CodeGen/LoongArch/1ri.mir @@ -80,17 +80,17 @@ --- # CHECK-LABEL: test_BEQZ: # CHECK-ENC: 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 1 1 0 0 1 0 0 0 0 0 0 0 -# CHECK-ASM: beqz $a0, 23 +# CHECK-ASM: beqz $a0, 92 name: test_BEQZ body: | bb.0: - BEQZ $r4, 23 + BEQZ $r4, 92 ... --- # CHECK-LABEL: test_BNEZ: # CHECK-ENC: 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 1 0 0 1 0 0 0 0 0 0 0 -# CHECK-ASM: bnez $a0, 21 +# CHECK-ASM: bnez $a0, 84 name: test_BNEZ body: | bb.0: - BNEZ $r4, 21 + BNEZ $r4, 84 diff --git a/llvm/test/CodeGen/LoongArch/2ri.mir b/llvm/test/CodeGen/LoongArch/2ri.mir --- a/llvm/test/CodeGen/LoongArch/2ri.mir +++ b/llvm/test/CodeGen/LoongArch/2ri.mir @@ -280,74 +280,74 @@ --- # CHECK-LABEL: test_LDPTR_W: # CHECK-ENC: 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: ldptr.w $a0, $a1, 66 +# CHECK-ASM: ldptr.w $a0, $a1, 264 name: test_LDPTR_W body: | bb.0: - $r4 = LDPTR_W $r5, 66 + $r4 = LDPTR_W $r5, 264 ... --- # CHECK-LABEL: test_LDPTR_D: # CHECK-ENC: 0 0 1 0 0 1 1 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: ldptr.d $a0, $a1, 56 +# CHECK-ASM: ldptr.d $a0, $a1, 224 name: test_LDPTR_D body: | bb.0: - $r4 = LDPTR_D $r5, 56 + $r4 = LDPTR_D $r5, 224 ... --- # CHECK-LABEL: test_STPTR_W: # CHECK-ENC: 0 0 1 0 0 1 0 1 0 0 0 0 0 0 0 1 0 1 0 1 1 1 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: stptr.w $a0, $a1, 87 +# CHECK-ASM: stptr.w $a0, $a1, 348 name: test_STPTR_W body: | bb.0: - STPTR_W $r4, $r5, 87 + STPTR_W $r4, $r5, 348 ... --- # CHECK-LABEL: test_STPTR_D: # CHECK-ENC: 0 0 1 0 0 1 1 1 0 0 0 0 0 0 1 0 0 1 0 0 0 1 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: stptr.d $a0, $a1, 145 +# CHECK-ASM: stptr.d $a0, $a1, 580 name: test_STPTR_D body: | bb.0: - STPTR_D $r4, $r5, 145 + STPTR_D $r4, $r5, 580 ... --- # CHECK-LABEL: test_LL_W: # CHECK-ENC: 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 0 0 1 1 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: ll.w $a0, $a1, 243 +# CHECK-ASM: ll.w $a0, $a1, 972 name: test_LL_W body: | bb.0: - $r4 = LL_W $r5, 243 + $r4 = LL_W $r5, 972 ... --- # CHECK-LABEL: test_LL_D: # CHECK-ENC: 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 1 0 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: ll.d $a0, $a1, 74 +# CHECK-ASM: ll.d $a0, $a1, 296 name: test_LL_D body: | bb.0: - $r4 = LL_D $r5, 74 + $r4 = LL_D $r5, 296 ... --- # CHECK-LABEL: test_SC_W: # CHECK-ENC: 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: sc.w $a0, $a1, 96 +# CHECK-ASM: sc.w $a0, $a1, 384 name: test_SC_W body: | bb.0: - $r4 = SC_W $r4, $r5, 96 + $r4 = SC_W $r4, $r5, 384 ... --- # CHECK-LABEL: test_SC_D: # CHECK-ENC: 0 0 1 0 0 0 1 1 0 0 0 0 0 0 0 1 1 0 1 0 0 1 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: sc.d $a0, $a1, 105 +# CHECK-ASM: sc.d $a0, $a1, 420 name: test_SC_D body: | bb.0: - $r4 = SC_D $r4, $r5, 105 + $r4 = SC_D $r4, $r5, 420 ... # ------------------------------------------------------------------------------------------------- @@ -371,62 +371,62 @@ --- # CHECK-LABEL: test_JIRL: # CHECK-ENC: 0 1 0 0 1 1 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 1 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: jirl $a0, $a1, 49 +# CHECK-ASM: jirl $a0, $a1, 196 name: test_JIRL body: | bb.0: - $r4 = JIRL $r5, 49 + $r4 = JIRL $r5, 196 ... --- # CHECK-LABEL: test_BEQ: # CHECK-ENC: 0 1 0 1 1 0 0 0 0 0 0 0 0 0 1 1 0 0 0 1 0 0 0 0 1 0 0 0 0 1 0 1 -# CHECK-ASM: beq $a0, $a1, 196 +# CHECK-ASM: beq $a0, $a1, 784 name: test_BEQ body: | bb.0: - BEQ $r4, $r5, 196 + BEQ $r4, $r5, 784 ... --- # CHECK-LABEL: test_BNE: # CHECK-ENC: 0 1 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 1 0 0 1 0 0 0 0 1 0 1 -# CHECK-ASM: bne $a0, $a1, 19 +# CHECK-ASM: bne $a0, $a1, 76 name: test_BNE body: | bb.0: - BNE $r4, $r5, 19 + BNE $r4, $r5, 76 ... --- # CHECK-LABEL: test_BLT: # CHECK-ENC: 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 0 1 1 0 0 1 0 0 0 0 1 0 1 -# CHECK-ASM: blt $a0, $a1, 123 +# CHECK-ASM: blt $a0, $a1, 492 name: test_BLT body: | bb.0: - BLT $r4, $r5, 123 + BLT $r4, $r5, 492 ... --- # CHECK-LABEL: test_BGE: # CHECK-ENC: 0 1 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 1 0 0 0 0 1 0 1 -# CHECK-ASM: bge $a0, $a1, 12 +# CHECK-ASM: bge $a0, $a1, 48 name: test_BGE body: | bb.0: - BGE $r4, $r5, 12 + BGE $r4, $r5, 48 ... --- # CHECK-LABEL: test_BLTU: # CHECK-ENC: 0 1 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 1 0 0 0 0 1 0 1 -# CHECK-ASM: bltu $a0, $a1, 17 +# CHECK-ASM: bltu $a0, $a1, 68 name: test_BLTU body: | bb.0: - BLTU $r4, $r5, 17 + BLTU $r4, $r5, 68 ... --- # CHECK-LABEL: test_BGEU: # CHECK-ENC: 0 1 1 0 1 1 0 0 0 0 0 0 0 0 0 1 0 1 1 0 0 0 0 0 1 0 0 0 0 1 0 1 -# CHECK-ASM: bgeu $a0, $a1, 88 +# CHECK-ASM: bgeu $a0, $a1, 352 name: test_BGEU body: | bb.0: - BGEU $r4, $r5, 88 + BGEU $r4, $r5, 352 diff --git a/llvm/test/CodeGen/LoongArch/3ri.mir b/llvm/test/CodeGen/LoongArch/3ri.mir --- a/llvm/test/CodeGen/LoongArch/3ri.mir +++ b/llvm/test/CodeGen/LoongArch/3ri.mir @@ -16,29 +16,29 @@ --- # CHECK-LABEL: test_ALSL_W: # CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 1 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: alsl.w $a0, $a1, $a2, 3 +# CHECK-ASM: alsl.w $a0, $a1, $a2, 4 name: test_ALSL_W body: | bb.0: - $r4 = ALSL_W $r5, $r6, 3 + $r4 = ALSL_W $r5, $r6, 4 ... --- # CHECK-LABEL: test_ALSL_WU: # CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 1 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: alsl.wu $a0, $a1, $a2, 1 +# CHECK-ASM: alsl.wu $a0, $a1, $a2, 2 name: test_ALSL_WU body: | bb.0: - $r4 = ALSL_WU $r5, $r6, 1 + $r4 = ALSL_WU $r5, $r6, 2 ... --- # CHECK-LABEL: test_ALSL_D: # CHECK-ENC: 0 0 0 0 0 0 0 0 0 0 1 0 1 1 0 1 1 0 0 1 1 0 0 0 1 0 1 0 0 1 0 0 -# CHECK-ASM: alsl.d $a0, $a1, $a2, 3 +# CHECK-ASM: alsl.d $a0, $a1, $a2, 4 name: test_ALSL_D body: | bb.0: - $r4 = ALSL_D $r5, $r6, 3 + $r4 = ALSL_D $r5, $r6, 4 ... --- # CHECK-LABEL: test_BYTEPICK_W: diff --git a/llvm/test/CodeGen/LoongArch/misc.mir b/llvm/test/CodeGen/LoongArch/misc.mir --- a/llvm/test/CodeGen/LoongArch/misc.mir +++ b/llvm/test/CodeGen/LoongArch/misc.mir @@ -62,20 +62,20 @@ --- # CHECK-LABEL: test_B: # CHECK-ENC: 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 -# CHECK-ASM: b 20 +# CHECK-ASM: b 80 name: test_B body: | bb.0: - B 20 + B 80 ... --- # CHECK-LABEL: test_BL: # CHECK-ENC: 0 1 0 1 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 -# CHECK-ASM: bl 34 +# CHECK-ASM: bl 136 name: test_BL body: | bb.0: - BL 34 + BL 136 ... # -------------------------------------------------------------------------------------------------------- diff --git a/llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-math.ll b/llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-math.ll --- a/llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-math.ll +++ b/llvm/test/CodeGen/PowerPC/builtins-ppc-xlcompat-math.ll @@ -98,49 +98,104 @@ declare float @llvm.ppc.fnmadds(float, float, float) -define dso_local double @fnmsub_t0(double %d, double %d2, double %d3) { -; CHECK-PWR8-LABEL: fnmsub_t0: +define dso_local float @fnmsub_f32(float %f, float %f2, float %f3) { +; CHECK-PWR8-LABEL: fnmsub_f32: ; CHECK-PWR8: # %bb.0: # %entry -; CHECK-PWR8-NEXT: xsnmsubmdp 1, 2, 3 +; CHECK-PWR8-NEXT: xsnmsubasp 3, 1, 2 +; CHECK-PWR8-NEXT: fmr 1, 3 ; CHECK-PWR8-NEXT: blr ; -; CHECK-NOVSX-LABEL: fnmsub_t0: +; CHECK-NOVSX-LABEL: fnmsub_f32: +; CHECK-NOVSX: # %bb.0: # %entry +; CHECK-NOVSX-NEXT: fnmsubs 1, 1, 2, 3 +; CHECK-NOVSX-NEXT: blr +; +; CHECK-PWR7-LABEL: fnmsub_f32: +; CHECK-PWR7: # %bb.0: # %entry +; CHECK-PWR7-NEXT: fnmsubs 1, 1, 2, 3 +; CHECK-PWR7-NEXT: blr +entry: + %0 = tail call float @llvm.ppc.fnmsub.f32(float %f, float %f2, float %f3) + ret float %0 +} + +declare float @llvm.ppc.fnmsub.f32(float, float, float) + +define dso_local double @fnmsub_f64(double %f, double %f2, double %f3) { +; CHECK-PWR8-LABEL: fnmsub_f64: +; CHECK-PWR8: # %bb.0: # %entry +; CHECK-PWR8-NEXT: xsnmsubadp 3, 1, 2 +; CHECK-PWR8-NEXT: fmr 1, 3 +; CHECK-PWR8-NEXT: blr +; +; CHECK-NOVSX-LABEL: fnmsub_f64: ; CHECK-NOVSX: # %bb.0: # %entry ; CHECK-NOVSX-NEXT: fnmsub 1, 1, 2, 3 ; CHECK-NOVSX-NEXT: blr ; -; CHECK-PWR7-LABEL: fnmsub_t0: +; CHECK-PWR7-LABEL: fnmsub_f64: ; CHECK-PWR7: # %bb.0: # %entry -; CHECK-PWR7-NEXT: xsnmsubmdp 1, 2, 3 +; CHECK-PWR7-NEXT: xsnmsubadp 3, 1, 2 +; CHECK-PWR7-NEXT: fmr 1, 3 ; CHECK-PWR7-NEXT: blr entry: - %0 = tail call double @llvm.ppc.fnmsub(double %d, double %d2, double %d3) + %0 = tail call double @llvm.ppc.fnmsub.f64(double %f, double %f2, double %f3) ret double %0 } -declare double @llvm.ppc.fnmsub(double, double, double) +declare double @llvm.ppc.fnmsub.f64(double, double, double) -define dso_local float @fnmsubs_t0(float %f, float %f2, float %f3) { -; CHECK-PWR8-LABEL: fnmsubs_t0: +define dso_local <4 x float> @fnmsub_v4f32(<4 x float> %f, <4 x float> %f2, <4 x float> %f3) { +; CHECK-PWR8-LABEL: fnmsub_v4f32: ; CHECK-PWR8: # %bb.0: # %entry -; CHECK-PWR8-NEXT: xsnmsubmsp 1, 2, 3 +; CHECK-PWR8-NEXT: xvnmsubasp 36, 34, 35 +; CHECK-PWR8-NEXT: vmr 2, 4 ; CHECK-PWR8-NEXT: blr ; -; CHECK-NOVSX-LABEL: fnmsubs_t0: +; CHECK-NOVSX-LABEL: fnmsub_v4f32: ; CHECK-NOVSX: # %bb.0: # %entry -; CHECK-NOVSX-NEXT: fnmsubs 1, 1, 2, 3 +; CHECK-NOVSX-NEXT: fnmsubs 1, 1, 5, 9 +; CHECK-NOVSX-NEXT: fnmsubs 2, 2, 6, 10 +; CHECK-NOVSX-NEXT: fnmsubs 3, 3, 7, 11 +; CHECK-NOVSX-NEXT: fnmsubs 4, 4, 8, 12 ; CHECK-NOVSX-NEXT: blr ; -; CHECK-PWR7-LABEL: fnmsubs_t0: +; CHECK-PWR7-LABEL: fnmsub_v4f32: ; CHECK-PWR7: # %bb.0: # %entry -; CHECK-PWR7-NEXT: fnmsubs 1, 1, 2, 3 +; CHECK-PWR7-NEXT: xvnmsubasp 36, 34, 35 +; CHECK-PWR7-NEXT: vmr 2, 4 ; CHECK-PWR7-NEXT: blr entry: - %0 = tail call float @llvm.ppc.fnmsubs(float %f, float %f2, float %f3) - ret float %0 + %0 = tail call <4 x float> @llvm.ppc.fnmsub.v4f32(<4 x float> %f, <4 x float> %f2, <4 x float> %f3) + ret <4 x float> %0 +} + +declare <4 x float> @llvm.ppc.fnmsub.v4f32(<4 x float>, <4 x float>, <4 x float>) + +define dso_local <2 x double> @fnmsub_v2f64(<2 x double> %f, <2 x double> %f2, <2 x double> %f3) { +; CHECK-PWR8-LABEL: fnmsub_v2f64: +; CHECK-PWR8: # %bb.0: # %entry +; CHECK-PWR8-NEXT: xvnmsubadp 36, 34, 35 +; CHECK-PWR8-NEXT: vmr 2, 4 +; CHECK-PWR8-NEXT: blr +; +; CHECK-NOVSX-LABEL: fnmsub_v2f64: +; CHECK-NOVSX: # %bb.0: # %entry +; CHECK-NOVSX-NEXT: fnmsub 1, 1, 3, 5 +; CHECK-NOVSX-NEXT: fnmsub 2, 2, 4, 6 +; CHECK-NOVSX-NEXT: blr +; +; CHECK-PWR7-LABEL: fnmsub_v2f64: +; CHECK-PWR7: # %bb.0: # %entry +; CHECK-PWR7-NEXT: xvnmsubadp 36, 34, 35 +; CHECK-PWR7-NEXT: vmr 2, 4 +; CHECK-PWR7-NEXT: blr +entry: + %0 = tail call <2 x double> @llvm.ppc.fnmsub.v2f64(<2 x double> %f, <2 x double> %f2, <2 x double> %f3) + ret <2 x double> %0 } -declare float @llvm.ppc.fnmsubs(float, float, float) +declare <2 x double> @llvm.ppc.fnmsub.v2f64(<2 x double>, <2 x double>, <2 x double>) define dso_local double @fre(double %d) { ; CHECK-PWR8-LABEL: fre: diff --git a/llvm/test/CodeGen/PowerPC/vec-itofp.ll b/llvm/test/CodeGen/PowerPC/vec-itofp.ll --- a/llvm/test/CodeGen/PowerPC/vec-itofp.ll +++ b/llvm/test/CodeGen/PowerPC/vec-itofp.ll @@ -307,34 +307,33 @@ ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv v2, 0(r4) ; CHECK-BE-NEXT: addis r4, r2, .LCPI3_0@toc@ha -; CHECK-BE-NEXT: xxlxor v4, v4, v4 ; CHECK-BE-NEXT: addi r4, r4, .LCPI3_0@toc@l ; CHECK-BE-NEXT: lxv v3, 0(r4) ; CHECK-BE-NEXT: addis r4, r2, .LCPI3_1@toc@ha ; CHECK-BE-NEXT: addi r4, r4, .LCPI3_1@toc@l -; CHECK-BE-NEXT: vperm v3, v4, v2, v3 +; CHECK-BE-NEXT: vperm v3, v2, v2, v3 ; CHECK-BE-NEXT: vextsh2d v3, v3 ; CHECK-BE-NEXT: xvcvsxddp vs0, v3 ; CHECK-BE-NEXT: lxv v3, 0(r4) ; CHECK-BE-NEXT: addis r4, r2, .LCPI3_2@toc@ha ; CHECK-BE-NEXT: addi r4, r4, .LCPI3_2@toc@l -; CHECK-BE-NEXT: vperm v3, v4, v2, v3 -; CHECK-BE-NEXT: stxv vs0, 16(r3) +; CHECK-BE-NEXT: vperm v3, v2, v2, v3 +; CHECK-BE-NEXT: stxv vs0, 0(r3) ; CHECK-BE-NEXT: vextsh2d v3, v3 ; CHECK-BE-NEXT: xvcvsxddp vs1, v3 ; CHECK-BE-NEXT: lxv v3, 0(r4) ; CHECK-BE-NEXT: addis r4, r2, .LCPI3_3@toc@ha ; CHECK-BE-NEXT: addi r4, r4, .LCPI3_3@toc@l ; CHECK-BE-NEXT: vperm v3, v2, v2, v3 -; CHECK-BE-NEXT: stxv vs1, 48(r3) +; CHECK-BE-NEXT: stxv vs1, 16(r3) ; CHECK-BE-NEXT: vextsh2d v3, v3 ; CHECK-BE-NEXT: xvcvsxddp vs2, v3 ; CHECK-BE-NEXT: lxv v3, 0(r4) ; CHECK-BE-NEXT: vperm v2, v2, v2, v3 -; CHECK-BE-NEXT: stxv vs2, 0(r3) +; CHECK-BE-NEXT: stxv vs2, 32(r3) ; CHECK-BE-NEXT: vextsh2d v2, v2 ; CHECK-BE-NEXT: xvcvsxddp vs3, v2 -; CHECK-BE-NEXT: stxv vs3, 32(r3) +; CHECK-BE-NEXT: stxv vs3, 48(r3) ; CHECK-BE-NEXT: blr entry: %0 = load <8 x i16>, <8 x i16>* %SrcPtr, align 16 @@ -395,20 +394,19 @@ ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv v2, 0(r4) ; CHECK-BE-NEXT: addis r4, r2, .LCPI4_0@toc@ha -; CHECK-BE-NEXT: xxlxor v3, v3, v3 ; CHECK-BE-NEXT: addi r4, r4, .LCPI4_0@toc@l -; CHECK-BE-NEXT: lxv v4, 0(r4) +; CHECK-BE-NEXT: lxv v3, 0(r4) ; CHECK-BE-NEXT: addis r4, r2, .LCPI4_1@toc@ha ; CHECK-BE-NEXT: addi r4, r4, .LCPI4_1@toc@l -; CHECK-BE-NEXT: vperm v3, v3, v2, v4 +; CHECK-BE-NEXT: vperm v3, v2, v2, v3 ; CHECK-BE-NEXT: vextsh2d v3, v3 ; CHECK-BE-NEXT: xvcvsxddp vs0, v3 ; CHECK-BE-NEXT: lxv v3, 0(r4) ; CHECK-BE-NEXT: vperm v2, v2, v2, v3 -; CHECK-BE-NEXT: stxv vs0, 16(r3) +; CHECK-BE-NEXT: stxv vs0, 0(r3) ; CHECK-BE-NEXT: vextsh2d v2, v2 ; CHECK-BE-NEXT: xvcvsxddp vs1, v2 -; CHECK-BE-NEXT: stxv vs1, 0(r3) +; CHECK-BE-NEXT: stxv vs1, 16(r3) ; CHECK-BE-NEXT: blr entry: %0 = load <4 x i16>, <4 x i16>* %SrcPtr, align 16 diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll --- a/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll +++ b/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll @@ -459,20 +459,19 @@ ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: mtvsrd v2, r4 ; CHECK-BE-NEXT: addis r4, r2, .LCPI5_0@toc@ha -; CHECK-BE-NEXT: xxlxor v3, v3, v3 ; CHECK-BE-NEXT: addi r4, r4, .LCPI5_0@toc@l -; CHECK-BE-NEXT: lxv v4, 0(r4) +; CHECK-BE-NEXT: lxv v3, 0(r4) ; CHECK-BE-NEXT: addis r4, r2, .LCPI5_1@toc@ha ; CHECK-BE-NEXT: addi r4, r4, .LCPI5_1@toc@l -; CHECK-BE-NEXT: vperm v3, v3, v2, v4 +; CHECK-BE-NEXT: vperm v3, v2, v2, v3 ; CHECK-BE-NEXT: vextsh2d v3, v3 ; CHECK-BE-NEXT: xvcvsxddp vs0, v3 ; CHECK-BE-NEXT: lxv v3, 0(r4) ; CHECK-BE-NEXT: vperm v2, v2, v2, v3 -; CHECK-BE-NEXT: stxv vs0, 16(r3) +; CHECK-BE-NEXT: stxv vs0, 0(r3) ; CHECK-BE-NEXT: vextsh2d v2, v2 ; CHECK-BE-NEXT: xvcvsxddp vs1, v2 -; CHECK-BE-NEXT: stxv vs1, 0(r3) +; CHECK-BE-NEXT: stxv vs1, 16(r3) ; CHECK-BE-NEXT: blr entry: %0 = bitcast i64 %a.coerce to <4 x i16> @@ -564,34 +563,33 @@ ; CHECK-BE-LABEL: test8elt_signed: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: addis r4, r2, .LCPI6_0@toc@ha -; CHECK-BE-NEXT: xxlxor v4, v4, v4 ; CHECK-BE-NEXT: addi r4, r4, .LCPI6_0@toc@l ; CHECK-BE-NEXT: lxv v3, 0(r4) ; CHECK-BE-NEXT: addis r4, r2, .LCPI6_1@toc@ha ; CHECK-BE-NEXT: addi r4, r4, .LCPI6_1@toc@l -; CHECK-BE-NEXT: vperm v3, v4, v2, v3 +; CHECK-BE-NEXT: vperm v3, v2, v2, v3 ; CHECK-BE-NEXT: vextsh2d v3, v3 ; CHECK-BE-NEXT: xvcvsxddp vs0, v3 ; CHECK-BE-NEXT: lxv v3, 0(r4) ; CHECK-BE-NEXT: addis r4, r2, .LCPI6_2@toc@ha ; CHECK-BE-NEXT: addi r4, r4, .LCPI6_2@toc@l -; CHECK-BE-NEXT: vperm v3, v4, v2, v3 -; CHECK-BE-NEXT: stxv vs0, 16(r3) +; CHECK-BE-NEXT: vperm v3, v2, v2, v3 +; CHECK-BE-NEXT: stxv vs0, 0(r3) ; CHECK-BE-NEXT: vextsh2d v3, v3 ; CHECK-BE-NEXT: xvcvsxddp vs1, v3 ; CHECK-BE-NEXT: lxv v3, 0(r4) ; CHECK-BE-NEXT: addis r4, r2, .LCPI6_3@toc@ha ; CHECK-BE-NEXT: addi r4, r4, .LCPI6_3@toc@l ; CHECK-BE-NEXT: vperm v3, v2, v2, v3 -; CHECK-BE-NEXT: stxv vs1, 48(r3) +; CHECK-BE-NEXT: stxv vs1, 16(r3) ; CHECK-BE-NEXT: vextsh2d v3, v3 ; CHECK-BE-NEXT: xvcvsxddp vs2, v3 ; CHECK-BE-NEXT: lxv v3, 0(r4) ; CHECK-BE-NEXT: vperm v2, v2, v2, v3 -; CHECK-BE-NEXT: stxv vs2, 0(r3) +; CHECK-BE-NEXT: stxv vs2, 32(r3) ; CHECK-BE-NEXT: vextsh2d v2, v2 ; CHECK-BE-NEXT: xvcvsxddp vs3, v2 -; CHECK-BE-NEXT: stxv vs3, 32(r3) +; CHECK-BE-NEXT: stxv vs3, 48(r3) ; CHECK-BE-NEXT: blr entry: %0 = sitofp <8 x i16> %a to <8 x double> @@ -730,52 +728,51 @@ ; CHECK-BE-LABEL: test16elt_signed: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: addis r5, r2, .LCPI7_0@toc@ha -; CHECK-BE-NEXT: lxv v4, 0(r4) -; CHECK-BE-NEXT: lxv v1, 16(r4) -; CHECK-BE-NEXT: xxlxor v5, v5, v5 -; CHECK-BE-NEXT: addis r4, r2, .LCPI7_2@toc@ha +; CHECK-BE-NEXT: lxv v2, 0(r4) ; CHECK-BE-NEXT: addi r5, r5, .LCPI7_0@toc@l -; CHECK-BE-NEXT: addi r4, r4, .LCPI7_2@toc@l -; CHECK-BE-NEXT: lxv v2, 0(r5) +; CHECK-BE-NEXT: lxv v3, 0(r5) ; CHECK-BE-NEXT: addis r5, r2, .LCPI7_1@toc@ha ; CHECK-BE-NEXT: addi r5, r5, .LCPI7_1@toc@l -; CHECK-BE-NEXT: lxv v3, 0(r5) -; CHECK-BE-NEXT: vperm v0, v5, v4, v2 -; CHECK-BE-NEXT: vperm v2, v5, v1, v2 -; CHECK-BE-NEXT: vextsh2d v2, v2 -; CHECK-BE-NEXT: vextsh2d v0, v0 -; CHECK-BE-NEXT: xvcvsxddp vs2, v2 -; CHECK-BE-NEXT: vperm v2, v5, v1, v3 -; CHECK-BE-NEXT: xvcvsxddp vs0, v0 -; CHECK-BE-NEXT: vperm v0, v5, v4, v3 +; CHECK-BE-NEXT: lxv v5, 0(r5) +; CHECK-BE-NEXT: addis r5, r2, .LCPI7_2@toc@ha +; CHECK-BE-NEXT: vperm v4, v2, v2, v3 +; CHECK-BE-NEXT: addi r5, r5, .LCPI7_2@toc@l +; CHECK-BE-NEXT: vextsh2d v4, v4 +; CHECK-BE-NEXT: lxv v0, 0(r5) +; CHECK-BE-NEXT: addis r5, r2, .LCPI7_3@toc@ha +; CHECK-BE-NEXT: xvcvsxddp vs0, v4 +; CHECK-BE-NEXT: vperm v4, v2, v2, v5 +; CHECK-BE-NEXT: addi r5, r5, .LCPI7_3@toc@l +; CHECK-BE-NEXT: lxv v1, 0(r5) +; CHECK-BE-NEXT: vextsh2d v4, v4 +; CHECK-BE-NEXT: xvcvsxddp vs1, v4 +; CHECK-BE-NEXT: vperm v4, v2, v2, v0 +; CHECK-BE-NEXT: vperm v2, v2, v2, v1 +; CHECK-BE-NEXT: stxv vs0, 0(r3) +; CHECK-BE-NEXT: vextsh2d v4, v4 +; CHECK-BE-NEXT: xvcvsxddp vs2, v4 +; CHECK-BE-NEXT: lxv v4, 16(r4) +; CHECK-BE-NEXT: stxv vs1, 16(r3) ; CHECK-BE-NEXT: vextsh2d v2, v2 -; CHECK-BE-NEXT: vextsh2d v0, v0 ; CHECK-BE-NEXT: xvcvsxddp vs3, v2 -; CHECK-BE-NEXT: lxv v2, 0(r4) -; CHECK-BE-NEXT: addis r4, r2, .LCPI7_3@toc@ha -; CHECK-BE-NEXT: xvcvsxddp vs1, v0 -; CHECK-BE-NEXT: addi r4, r4, .LCPI7_3@toc@l -; CHECK-BE-NEXT: stxv vs2, 80(r3) -; CHECK-BE-NEXT: stxv vs0, 16(r3) -; CHECK-BE-NEXT: vperm v3, v4, v4, v2 -; CHECK-BE-NEXT: vperm v2, v1, v1, v2 -; CHECK-BE-NEXT: stxv vs3, 112(r3) -; CHECK-BE-NEXT: stxv vs1, 48(r3) -; CHECK-BE-NEXT: vextsh2d v3, v3 +; CHECK-BE-NEXT: vperm v2, v4, v4, v3 +; CHECK-BE-NEXT: stxv vs2, 32(r3) +; CHECK-BE-NEXT: vextsh2d v2, v2 +; CHECK-BE-NEXT: stxv vs3, 48(r3) +; CHECK-BE-NEXT: xvcvsxddp vs4, v2 +; CHECK-BE-NEXT: vperm v2, v4, v4, v5 +; CHECK-BE-NEXT: vextsh2d v2, v2 +; CHECK-BE-NEXT: xvcvsxddp vs5, v2 +; CHECK-BE-NEXT: vperm v2, v4, v4, v0 +; CHECK-BE-NEXT: stxv vs4, 64(r3) ; CHECK-BE-NEXT: vextsh2d v2, v2 -; CHECK-BE-NEXT: xvcvsxddp vs4, v3 -; CHECK-BE-NEXT: lxv v3, 0(r4) ; CHECK-BE-NEXT: xvcvsxddp vs6, v2 -; CHECK-BE-NEXT: vperm v4, v4, v4, v3 -; CHECK-BE-NEXT: vperm v2, v1, v1, v3 -; CHECK-BE-NEXT: stxv vs6, 64(r3) -; CHECK-BE-NEXT: stxv vs4, 0(r3) -; CHECK-BE-NEXT: vextsh2d v4, v4 +; CHECK-BE-NEXT: vperm v2, v4, v4, v1 +; CHECK-BE-NEXT: stxv vs5, 80(r3) ; CHECK-BE-NEXT: vextsh2d v2, v2 -; CHECK-BE-NEXT: xvcvsxddp vs5, v4 ; CHECK-BE-NEXT: xvcvsxddp vs7, v2 -; CHECK-BE-NEXT: stxv vs7, 96(r3) -; CHECK-BE-NEXT: stxv vs5, 32(r3) +; CHECK-BE-NEXT: stxv vs6, 96(r3) +; CHECK-BE-NEXT: stxv vs7, 112(r3) ; CHECK-BE-NEXT: blr entry: %a = load <16 x i16>, <16 x i16>* %0, align 32 diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll --- a/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll +++ b/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll @@ -404,20 +404,19 @@ ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: mtvsrd v2, r4 ; CHECK-BE-NEXT: addis r4, r2, .LCPI6_0@toc@ha -; CHECK-BE-NEXT: xxlxor v3, v3, v3 ; CHECK-BE-NEXT: addi r4, r4, .LCPI6_0@toc@l -; CHECK-BE-NEXT: lxv v4, 0(r4) +; CHECK-BE-NEXT: lxv v3, 0(r4) ; CHECK-BE-NEXT: addis r4, r2, .LCPI6_1@toc@ha ; CHECK-BE-NEXT: addi r4, r4, .LCPI6_1@toc@l -; CHECK-BE-NEXT: vperm v3, v3, v2, v4 +; CHECK-BE-NEXT: vperm v3, v2, v2, v3 ; CHECK-BE-NEXT: vextsb2w v3, v3 ; CHECK-BE-NEXT: xvcvsxwsp vs0, v3 ; CHECK-BE-NEXT: lxv v3, 0(r4) ; CHECK-BE-NEXT: vperm v2, v2, v2, v3 -; CHECK-BE-NEXT: stxv vs0, 16(r3) +; CHECK-BE-NEXT: stxv vs0, 0(r3) ; CHECK-BE-NEXT: vextsb2w v2, v2 ; CHECK-BE-NEXT: xvcvsxwsp vs1, v2 -; CHECK-BE-NEXT: stxv vs1, 0(r3) +; CHECK-BE-NEXT: stxv vs1, 16(r3) ; CHECK-BE-NEXT: blr entry: %0 = bitcast i64 %a.coerce to <8 x i8> @@ -503,34 +502,33 @@ ; CHECK-BE-LABEL: test16elt_signed: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: addis r4, r2, .LCPI7_0@toc@ha -; CHECK-BE-NEXT: xxlxor v4, v4, v4 ; CHECK-BE-NEXT: addi r4, r4, .LCPI7_0@toc@l ; CHECK-BE-NEXT: lxv v3, 0(r4) ; CHECK-BE-NEXT: addis r4, r2, .LCPI7_1@toc@ha ; CHECK-BE-NEXT: addi r4, r4, .LCPI7_1@toc@l -; CHECK-BE-NEXT: vperm v3, v4, v2, v3 +; CHECK-BE-NEXT: vperm v3, v2, v2, v3 ; CHECK-BE-NEXT: vextsb2w v3, v3 ; CHECK-BE-NEXT: xvcvsxwsp vs0, v3 ; CHECK-BE-NEXT: lxv v3, 0(r4) ; CHECK-BE-NEXT: addis r4, r2, .LCPI7_2@toc@ha ; CHECK-BE-NEXT: addi r4, r4, .LCPI7_2@toc@l -; CHECK-BE-NEXT: vperm v3, v4, v2, v3 -; CHECK-BE-NEXT: stxv vs0, 16(r3) +; CHECK-BE-NEXT: vperm v3, v2, v2, v3 +; CHECK-BE-NEXT: stxv vs0, 0(r3) ; CHECK-BE-NEXT: vextsb2w v3, v3 ; CHECK-BE-NEXT: xvcvsxwsp vs1, v3 ; CHECK-BE-NEXT: lxv v3, 0(r4) ; CHECK-BE-NEXT: addis r4, r2, .LCPI7_3@toc@ha ; CHECK-BE-NEXT: addi r4, r4, .LCPI7_3@toc@l ; CHECK-BE-NEXT: vperm v3, v2, v2, v3 -; CHECK-BE-NEXT: stxv vs1, 48(r3) +; CHECK-BE-NEXT: stxv vs1, 16(r3) ; CHECK-BE-NEXT: vextsb2w v3, v3 ; CHECK-BE-NEXT: xvcvsxwsp vs2, v3 ; CHECK-BE-NEXT: lxv v3, 0(r4) ; CHECK-BE-NEXT: vperm v2, v2, v2, v3 -; CHECK-BE-NEXT: stxv vs2, 0(r3) +; CHECK-BE-NEXT: stxv vs2, 32(r3) ; CHECK-BE-NEXT: vextsb2w v2, v2 ; CHECK-BE-NEXT: xvcvsxwsp vs3, v2 -; CHECK-BE-NEXT: stxv vs3, 32(r3) +; CHECK-BE-NEXT: stxv vs3, 48(r3) ; CHECK-BE-NEXT: blr entry: %0 = sitofp <16 x i8> %a to <16 x float> diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll --- a/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll +++ b/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll @@ -492,20 +492,19 @@ ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: mtvsrwz v2, r4 ; CHECK-BE-NEXT: addis r4, r2, .LCPI5_0@toc@ha -; CHECK-BE-NEXT: xxlxor v3, v3, v3 ; CHECK-BE-NEXT: addi r4, r4, .LCPI5_0@toc@l -; CHECK-BE-NEXT: lxv v4, 0(r4) +; CHECK-BE-NEXT: lxv v3, 0(r4) ; CHECK-BE-NEXT: addis r4, r2, .LCPI5_1@toc@ha ; CHECK-BE-NEXT: addi r4, r4, .LCPI5_1@toc@l -; CHECK-BE-NEXT: vperm v3, v3, v2, v4 +; CHECK-BE-NEXT: vperm v3, v2, v2, v3 ; CHECK-BE-NEXT: vextsb2d v3, v3 ; CHECK-BE-NEXT: xvcvsxddp vs0, v3 ; CHECK-BE-NEXT: lxv v3, 0(r4) ; CHECK-BE-NEXT: vperm v2, v2, v2, v3 -; CHECK-BE-NEXT: stxv vs0, 16(r3) +; CHECK-BE-NEXT: stxv vs0, 0(r3) ; CHECK-BE-NEXT: vextsb2d v2, v2 ; CHECK-BE-NEXT: xvcvsxddp vs1, v2 -; CHECK-BE-NEXT: stxv vs1, 0(r3) +; CHECK-BE-NEXT: stxv vs1, 16(r3) ; CHECK-BE-NEXT: blr entry: %0 = bitcast i32 %a.coerce to <4 x i8> @@ -600,34 +599,33 @@ ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: mtvsrd v2, r4 ; CHECK-BE-NEXT: addis r4, r2, .LCPI6_0@toc@ha -; CHECK-BE-NEXT: xxlxor v4, v4, v4 ; CHECK-BE-NEXT: addi r4, r4, .LCPI6_0@toc@l ; CHECK-BE-NEXT: lxv v3, 0(r4) ; CHECK-BE-NEXT: addis r4, r2, .LCPI6_1@toc@ha ; CHECK-BE-NEXT: addi r4, r4, .LCPI6_1@toc@l -; CHECK-BE-NEXT: vperm v3, v4, v2, v3 +; CHECK-BE-NEXT: vperm v3, v2, v2, v3 ; CHECK-BE-NEXT: vextsb2d v3, v3 ; CHECK-BE-NEXT: xvcvsxddp vs0, v3 ; CHECK-BE-NEXT: lxv v3, 0(r4) ; CHECK-BE-NEXT: addis r4, r2, .LCPI6_2@toc@ha ; CHECK-BE-NEXT: addi r4, r4, .LCPI6_2@toc@l -; CHECK-BE-NEXT: vperm v3, v4, v2, v3 -; CHECK-BE-NEXT: stxv vs0, 16(r3) +; CHECK-BE-NEXT: vperm v3, v2, v2, v3 +; CHECK-BE-NEXT: stxv vs0, 0(r3) ; CHECK-BE-NEXT: vextsb2d v3, v3 ; CHECK-BE-NEXT: xvcvsxddp vs1, v3 ; CHECK-BE-NEXT: lxv v3, 0(r4) ; CHECK-BE-NEXT: addis r4, r2, .LCPI6_3@toc@ha ; CHECK-BE-NEXT: addi r4, r4, .LCPI6_3@toc@l ; CHECK-BE-NEXT: vperm v3, v2, v2, v3 -; CHECK-BE-NEXT: stxv vs1, 48(r3) +; CHECK-BE-NEXT: stxv vs1, 16(r3) ; CHECK-BE-NEXT: vextsb2d v3, v3 ; CHECK-BE-NEXT: xvcvsxddp vs2, v3 ; CHECK-BE-NEXT: lxv v3, 0(r4) ; CHECK-BE-NEXT: vperm v2, v2, v2, v3 -; CHECK-BE-NEXT: stxv vs2, 0(r3) +; CHECK-BE-NEXT: stxv vs2, 32(r3) ; CHECK-BE-NEXT: vextsb2d v2, v2 ; CHECK-BE-NEXT: xvcvsxddp vs3, v2 -; CHECK-BE-NEXT: stxv vs3, 32(r3) +; CHECK-BE-NEXT: stxv vs3, 48(r3) ; CHECK-BE-NEXT: blr entry: %0 = bitcast i64 %a.coerce to <8 x i8> @@ -787,62 +785,61 @@ ; CHECK-BE-LABEL: test16elt_signed: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: addis r4, r2, .LCPI7_0@toc@ha -; CHECK-BE-NEXT: xxlxor v3, v3, v3 ; CHECK-BE-NEXT: addi r4, r4, .LCPI7_0@toc@l -; CHECK-BE-NEXT: lxv v4, 0(r4) +; CHECK-BE-NEXT: lxv v3, 0(r4) ; CHECK-BE-NEXT: addis r4, r2, .LCPI7_1@toc@ha ; CHECK-BE-NEXT: addi r4, r4, .LCPI7_1@toc@l -; CHECK-BE-NEXT: vperm v4, v3, v2, v4 -; CHECK-BE-NEXT: vextsb2d v4, v4 -; CHECK-BE-NEXT: xvcvsxddp vs0, v4 -; CHECK-BE-NEXT: lxv v4, 0(r4) +; CHECK-BE-NEXT: vperm v3, v2, v2, v3 +; CHECK-BE-NEXT: vextsb2d v3, v3 +; CHECK-BE-NEXT: xvcvsxddp vs0, v3 +; CHECK-BE-NEXT: lxv v3, 0(r4) ; CHECK-BE-NEXT: addis r4, r2, .LCPI7_2@toc@ha ; CHECK-BE-NEXT: addi r4, r4, .LCPI7_2@toc@l -; CHECK-BE-NEXT: vperm v4, v3, v2, v4 -; CHECK-BE-NEXT: stxv vs0, 16(r3) -; CHECK-BE-NEXT: vextsb2d v4, v4 -; CHECK-BE-NEXT: xvcvsxddp vs1, v4 -; CHECK-BE-NEXT: lxv v4, 0(r4) +; CHECK-BE-NEXT: vperm v3, v2, v2, v3 +; CHECK-BE-NEXT: stxv vs0, 0(r3) +; CHECK-BE-NEXT: vextsb2d v3, v3 +; CHECK-BE-NEXT: xvcvsxddp vs1, v3 +; CHECK-BE-NEXT: lxv v3, 0(r4) ; CHECK-BE-NEXT: addis r4, r2, .LCPI7_3@toc@ha ; CHECK-BE-NEXT: addi r4, r4, .LCPI7_3@toc@l -; CHECK-BE-NEXT: vperm v4, v3, v2, v4 -; CHECK-BE-NEXT: stxv vs1, 48(r3) -; CHECK-BE-NEXT: vextsb2d v4, v4 -; CHECK-BE-NEXT: xvcvsxddp vs2, v4 -; CHECK-BE-NEXT: lxv v4, 0(r4) +; CHECK-BE-NEXT: vperm v3, v2, v2, v3 +; CHECK-BE-NEXT: stxv vs1, 16(r3) +; CHECK-BE-NEXT: vextsb2d v3, v3 +; CHECK-BE-NEXT: xvcvsxddp vs2, v3 +; CHECK-BE-NEXT: lxv v3, 0(r4) ; CHECK-BE-NEXT: addis r4, r2, .LCPI7_4@toc@ha ; CHECK-BE-NEXT: addi r4, r4, .LCPI7_4@toc@l -; CHECK-BE-NEXT: vperm v3, v3, v2, v4 -; CHECK-BE-NEXT: stxv vs2, 80(r3) +; CHECK-BE-NEXT: vperm v3, v2, v2, v3 +; CHECK-BE-NEXT: stxv vs2, 32(r3) ; CHECK-BE-NEXT: vextsb2d v3, v3 ; CHECK-BE-NEXT: xvcvsxddp vs3, v3 ; CHECK-BE-NEXT: lxv v3, 0(r4) ; CHECK-BE-NEXT: addis r4, r2, .LCPI7_5@toc@ha ; CHECK-BE-NEXT: addi r4, r4, .LCPI7_5@toc@l ; CHECK-BE-NEXT: vperm v3, v2, v2, v3 -; CHECK-BE-NEXT: stxv vs3, 112(r3) +; CHECK-BE-NEXT: stxv vs3, 48(r3) ; CHECK-BE-NEXT: vextsb2d v3, v3 ; CHECK-BE-NEXT: xvcvsxddp vs4, v3 ; CHECK-BE-NEXT: lxv v3, 0(r4) ; CHECK-BE-NEXT: addis r4, r2, .LCPI7_6@toc@ha ; CHECK-BE-NEXT: addi r4, r4, .LCPI7_6@toc@l ; CHECK-BE-NEXT: vperm v3, v2, v2, v3 -; CHECK-BE-NEXT: stxv vs4, 0(r3) +; CHECK-BE-NEXT: stxv vs4, 64(r3) ; CHECK-BE-NEXT: vextsb2d v3, v3 ; CHECK-BE-NEXT: xvcvsxddp vs5, v3 ; CHECK-BE-NEXT: lxv v3, 0(r4) ; CHECK-BE-NEXT: addis r4, r2, .LCPI7_7@toc@ha ; CHECK-BE-NEXT: addi r4, r4, .LCPI7_7@toc@l ; CHECK-BE-NEXT: vperm v3, v2, v2, v3 -; CHECK-BE-NEXT: stxv vs5, 32(r3) +; CHECK-BE-NEXT: stxv vs5, 80(r3) ; CHECK-BE-NEXT: vextsb2d v3, v3 ; CHECK-BE-NEXT: xvcvsxddp vs6, v3 ; CHECK-BE-NEXT: lxv v3, 0(r4) ; CHECK-BE-NEXT: vperm v2, v2, v2, v3 -; CHECK-BE-NEXT: stxv vs6, 64(r3) +; CHECK-BE-NEXT: stxv vs6, 96(r3) ; CHECK-BE-NEXT: vextsb2d v2, v2 ; CHECK-BE-NEXT: xvcvsxddp vs7, v2 -; CHECK-BE-NEXT: stxv vs7, 96(r3) +; CHECK-BE-NEXT: stxv vs7, 112(r3) ; CHECK-BE-NEXT: blr entry: %0 = sitofp <16 x i8> %a to <16 x double> diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsgeu-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vmsgeu-rv32.ll --- a/llvm/test/CodeGen/RISCV/rvv/vmsgeu-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsgeu-rv32.ll @@ -2097,9 +2097,8 @@ define @intrinsic_vmsgeu_mask_vi_nxv2i16_i16( %0, %1, %2, i32 %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vi_nxv2i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmset.m v8 -; CHECK-NEXT: vmand.mm v0, v9, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmor.mm v0, v9, v0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmsgeu.mask.nxv2i16.i16( @@ -2112,6 +2111,21 @@ ret %a } +define @intrinsic_vmsgeu_mask_vi_nxv2i16_i16_same_mask_maskedoff( %0, %1, i32 %2) nounwind { +; CHECK-LABEL: intrinsic_vmsgeu_mask_vi_nxv2i16_i16_same_mask_maskedoff: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vmsgeu.mask.nxv2i16.i16( + %0, + %1, + i16 0, + %0, + i32 %2) + + ret %a +} + define @intrinsic_vmsgeu_vi_nxv4i16_i16( %0, i32 %1) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_vi_nxv4i16_i16: ; CHECK: # %bb.0: # %entry diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsgeu-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vmsgeu-rv64.ll --- a/llvm/test/CodeGen/RISCV/rvv/vmsgeu-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsgeu-rv64.ll @@ -2064,9 +2064,8 @@ define @intrinsic_vmsgeu_mask_vi_nxv2i16_i16( %0, %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vi_nxv2i16_i16: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, mu -; CHECK-NEXT: vmset.m v8 -; CHECK-NEXT: vmand.mm v0, v9, v8 +; CHECK-NEXT: vsetvli zero, a0, e8, mf4, ta, mu +; CHECK-NEXT: vmor.mm v0, v9, v0 ; CHECK-NEXT: ret entry: %a = call @llvm.riscv.vmsgeu.mask.nxv2i16.i16( @@ -2094,6 +2093,21 @@ ret %a } +define @intrinsic_vmsgeu_mask_vi_nxv2i16_i16_same_mask_maskedoff( %0, %1, i64 %2) nounwind { +; CHECK-LABEL: intrinsic_vmsgeu_mask_vi_nxv2i16_i16_same_mask_maskedoff: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: ret +entry: + %a = call @llvm.riscv.vmsgeu.mask.nxv2i16.i16( + %0, + %1, + i16 0, + %0, + i64 %2) + + ret %a +} + define @intrinsic_vmsgeu_mask_vi_nxv4i16_i16( %0, %1, %2, i64 %3) nounwind { ; CHECK-LABEL: intrinsic_vmsgeu_mask_vi_nxv4i16_i16: ; CHECK: # %bb.0: # %entry diff --git a/llvm/test/CodeGen/Thumb2/mve-vst3.ll b/llvm/test/CodeGen/Thumb2/mve-vst3.ll --- a/llvm/test/CodeGen/Thumb2/mve-vst3.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vst3.ll @@ -533,30 +533,27 @@ define void @vst3_v2i8(<2 x i8> *%src, <6 x i8> *%dst) { ; CHECK-LABEL: vst3_v2i8: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, lr} -; CHECK-NEXT: push {r4, r5, r6, lr} +; CHECK-NEXT: .save {r4, r5, r7, lr} +; CHECK-NEXT: push {r4, r5, r7, lr} ; CHECK-NEXT: .pad #16 ; CHECK-NEXT: sub sp, #16 ; CHECK-NEXT: ldrb r2, [r0] -; CHECK-NEXT: movs r6, #0 +; CHECK-NEXT: mov r4, sp ; CHECK-NEXT: ldrb r3, [r0, #1] ; CHECK-NEXT: ldrb.w r12, [r0, #2] ; CHECK-NEXT: vmov q0[2], q0[0], r2, r3 ; CHECK-NEXT: ldrb.w lr, [r0, #3] -; CHECK-NEXT: vmov r4, s0 +; CHECK-NEXT: vmov r2, s0 ; CHECK-NEXT: ldrb r5, [r0, #5] -; CHECK-NEXT: vmov.16 q0[0], r4 +; CHECK-NEXT: vmov.16 q0[0], r2 ; CHECK-NEXT: ldrb r0, [r0, #4] ; CHECK-NEXT: vmov.16 q0[1], r12 -; CHECK-NEXT: mov r2, sp ; CHECK-NEXT: vmov.16 q0[2], r0 ; CHECK-NEXT: add r0, sp, #8 ; CHECK-NEXT: vmov.16 q0[3], r3 ; CHECK-NEXT: vmov.16 q0[4], lr ; CHECK-NEXT: vmov.16 q0[5], r5 -; CHECK-NEXT: vmov.16 q0[6], r6 -; CHECK-NEXT: vmov.16 q0[7], r6 -; CHECK-NEXT: vstrb.16 q0, [r2] +; CHECK-NEXT: vstrb.16 q0, [r4] ; CHECK-NEXT: vstrb.16 q0, [r0] ; CHECK-NEXT: vldrh.u32 q0, [r0] ; CHECK-NEXT: ldr r2, [sp] @@ -564,7 +561,7 @@ ; CHECK-NEXT: vmov r0, s2 ; CHECK-NEXT: strh r0, [r1, #4] ; CHECK-NEXT: add sp, #16 -; CHECK-NEXT: pop {r4, r5, r6, pc} +; CHECK-NEXT: pop {r4, r5, r7, pc} entry: %s1 = getelementptr <2 x i8>, <2 x i8>* %src, i32 0 %l1 = load <2 x i8>, <2 x i8>* %s1, align 4 diff --git a/llvm/test/CodeGen/VE/Packed/vec_load.ll b/llvm/test/CodeGen/VE/Packed/vec_load.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/VE/Packed/vec_load.ll @@ -0,0 +1,52 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=ve-unknown-unknown -mattr=+vpu | FileCheck %s + +declare <512 x float> @llvm.masked.load.v512f32.p0v512f32(<512 x float>* %0, i32 immarg %1, <512 x i1> %2, <512 x float> %3) #0 + +; Function Attrs: nounwind +define fastcc <512 x float> @vec_mload_v512f32(<512 x float>* %P, <512 x i1> %M) { +; CHECK-LABEL: vec_mload_v512f32: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: vldu %v0, 8, %s0 +; CHECK-NEXT: lea %s0, 4(, %s0) +; CHECK-NEXT: vldu %v1, 8, %s0 +; CHECK-NEXT: vshf %v0, %v1, %v0, 8 +; CHECK-NEXT: b.l.t (, %s10) + %r = call <512 x float> @llvm.masked.load.v512f32.p0v512f32(<512 x float>* %P, i32 16, <512 x i1> %M, <512 x float> undef) + ret <512 x float> %r +} + +; TODO: Packed select legalization +; Function Attrs: nounwind +; define fastcc <512 x float> @vec_mload_pt_v512f32(<512 x float>* %P, <512 x float> %PT, <512 x i1> %M) { +; %r = call <512 x float> @llvm.masked.load.v512f32.p0v512f32(<512 x float>* %P, i32 16, <512 x i1> %M, <512 x float> %PT) +; ret <512 x float> %r +; } + +declare <512 x i32> @llvm.masked.load.v512i32.p0v512i32(<512 x i32>* %0, i32 immarg %1, <512 x i1> %2, <512 x i32> %3) #0 + +; Function Attrs: nounwind +define fastcc <512 x i32> @vec_mload_v512i32(<512 x i32>* %P, <512 x i1> %M) { +; CHECK-LABEL: vec_mload_v512i32: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: vldl.zx %v0, 8, %s0 +; CHECK-NEXT: lea %s0, 4(, %s0) +; CHECK-NEXT: vldl.zx %v1, 8, %s0 +; CHECK-NEXT: vshf %v0, %v1, %v0, 13 +; CHECK-NEXT: b.l.t (, %s10) + %r = call <512 x i32> @llvm.masked.load.v512i32.p0v512i32(<512 x i32>* %P, i32 16, <512 x i1> %M, <512 x i32> undef) + ret <512 x i32> %r +} + +; TODO: Packed select legalization +; ; Function Attrs: nounwind +; define fastcc <512 x i32> @vec_mload_pt_v512i32(<512 x i32>* %P, <512 x i32> %PT, <512 x i1> %M) { +; %r = call <512 x i32> @llvm.masked.load.v512i32.p0v512i32(<512 x i32>* %P, i32 16, <512 x i1> %M, <512 x i32> %PT) +; ret <512 x i32> %r +; } + +attributes #0 = { argmemonly nounwind readonly willreturn } diff --git a/llvm/test/CodeGen/VE/Packed/vec_store.ll b/llvm/test/CodeGen/VE/Packed/vec_store.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/VE/Packed/vec_store.ll @@ -0,0 +1,35 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=ve-unknown-unknown -mattr=+vpu | FileCheck %s + +declare void @llvm.masked.store.v512f32.p0v512f32(<512 x float>, <512 x float>*, i32 immarg, <512 x i1>) + +define fastcc void @vec_mstore_v512f32(<512 x float>* %P, <512 x float> %V, <512 x i1> %M) { +; CHECK-LABEL: vec_mstore_v512f32: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s1, 256 +; CHECK-NEXT: lvl %s1 +; CHECK-NEXT: vstu %v0, 8, %s0 +; CHECK-NEXT: vshf %v0, %v0, %v0, 4 +; CHECK-NEXT: lea %s0, 4(, %s0) +; CHECK-NEXT: vstu %v0, 8, %s0 +; CHECK-NEXT: b.l.t (, %s10) + call void @llvm.masked.store.v512f32.p0v512f32(<512 x float> %V, <512 x float>* %P, i32 16, <512 x i1> %M) + ret void +} + + +declare void @llvm.masked.store.v512i32.p0v512i32(<512 x i32>, <512 x i32>*, i32 immarg, <512 x i1>) + +define fastcc void @vec_mstore_v512i32(<512 x i32>* %P, <512 x i32> %V, <512 x i1> %M) { +; CHECK-LABEL: vec_mstore_v512i32: +; CHECK: # %bb.0: +; CHECK-NEXT: lea %s1, 4(, %s0) +; CHECK-NEXT: lea %s2, 256 +; CHECK-NEXT: lvl %s2 +; CHECK-NEXT: vstl %v0, 8, %s1 +; CHECK-NEXT: vshf %v0, %v0, %v0, 0 +; CHECK-NEXT: vstl %v0, 8, %s0 +; CHECK-NEXT: b.l.t (, %s10) + call void @llvm.masked.store.v512i32.p0v512i32(<512 x i32> %V, <512 x i32>* %P, i32 16, <512 x i1> %M) + ret void +} diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll --- a/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx512-intrinsics-upgrade.ll @@ -4232,63 +4232,65 @@ declare <8 x i64> @llvm.x86.avx512.mask.psrl.qi.512(<8 x i64>, i32, <8 x i64>, i8) -define <8 x i64>@test_int_x86_avx512_mask_psrl_qi_512(<8 x i64> %x0, i32 %x1, <8 x i64> %x2, i8 %x3) { +define { <8 x i64>, <8 x i64>, <8 x i64> }@test_int_x86_avx512_mask_psrl_qi_512(<8 x i64> %x0, i32 %x1, <8 x i64> %x2, i8 %x3) { ; X86-LABEL: test_int_x86_avx512_mask_psrl_qi_512: ; X86: ## %bb.0: +; X86-NEXT: vmovdqa64 %zmm1, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd9] ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08] ; X86-NEXT: kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8] -; X86-NEXT: vpsrlq $4, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x73,0xd0,0x04] -; X86-NEXT: vpsrlq $5, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x73,0xd0,0x05] -; X86-NEXT: vpsrlq $6, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x73,0xd0,0x06] -; X86-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0] -; X86-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] +; X86-NEXT: vpsrlq $4, %zmm0, %zmm3 {%k1} ## encoding: [0x62,0xf1,0xe5,0x49,0x73,0xd0,0x04] +; X86-NEXT: vpsrlq $5, %zmm0, %zmm1 ## encoding: [0x62,0xf1,0xf5,0x48,0x73,0xd0,0x05] +; X86-NEXT: vpsrlq $6, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0xed,0xc9,0x73,0xd0,0x06] +; X86-NEXT: vmovdqa64 %zmm3, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3] ; X86-NEXT: retl ## encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_psrl_qi_512: ; X64: ## %bb.0: +; X64-NEXT: vmovdqa64 %zmm1, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd9] ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vpsrlq $4, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x73,0xd0,0x04] -; X64-NEXT: vpsrlq $5, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x73,0xd0,0x05] -; X64-NEXT: vpsrlq $6, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x73,0xd0,0x06] -; X64-NEXT: vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0] -; X64-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] +; X64-NEXT: vpsrlq $4, %zmm0, %zmm3 {%k1} ## encoding: [0x62,0xf1,0xe5,0x49,0x73,0xd0,0x04] +; X64-NEXT: vpsrlq $5, %zmm0, %zmm1 ## encoding: [0x62,0xf1,0xf5,0x48,0x73,0xd0,0x05] +; X64-NEXT: vpsrlq $6, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0xed,0xc9,0x73,0xd0,0x06] +; X64-NEXT: vmovdqa64 %zmm3, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3] ; X64-NEXT: retq ## encoding: [0xc3] - %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.qi.512(<8 x i64> %x0, i32 4, <8 x i64> %x2, i8 %x3) + %res0 = call <8 x i64> @llvm.x86.avx512.mask.psrl.qi.512(<8 x i64> %x0, i32 4, <8 x i64> %x2, i8 %x3) %res1 = call <8 x i64> @llvm.x86.avx512.mask.psrl.qi.512(<8 x i64> %x0, i32 5, <8 x i64> %x2, i8 -1) %res2 = call <8 x i64> @llvm.x86.avx512.mask.psrl.qi.512(<8 x i64> %x0, i32 6, <8 x i64> zeroinitializer, i8 %x3) - %res3 = add <8 x i64> %res, %res1 - %res4 = add <8 x i64> %res3, %res2 - ret <8 x i64> %res4 + %res3 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } poison, <8 x i64> %res0, 0 + %res4 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } %res3, <8 x i64> %res1, 1 + %res5 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } %res4, <8 x i64> %res2, 2 + ret { <8 x i64>, <8 x i64>, <8 x i64> } %res5 } -declare <16 x i32> @llvm.x86.avx512.mask.psrl.di.512(<16 x i32>, i32, <16 x i32>, i16) +declare <16 x i32>@llvm.x86.avx512.mask.psrl.di.512(<16 x i32>, i32, <16 x i32>, i16) -define <16 x i32>@test_int_x86_avx512_mask_psrl_di_512(<16 x i32> %x0, i32 %x1, <16 x i32> %x2, i16 %x3) { +define { <16 x i32>, <16 x i32>, <16 x i32> }@test_int_x86_avx512_mask_psrl_di_512(<16 x i32> %x0, i32 %x1, <16 x i32> %x2, i16 %x3) { ; X86-LABEL: test_int_x86_avx512_mask_psrl_di_512: ; X86: ## %bb.0: +; X86-NEXT: vmovdqa64 %zmm1, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd9] ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] -; X86-NEXT: vpsrld $4, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xd0,0x04] -; X86-NEXT: vpsrld $5, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0x72,0xd0,0x05] -; X86-NEXT: vpsrld $6, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x72,0xd0,0x06] -; X86-NEXT: vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0] -; X86-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0] +; X86-NEXT: vpsrld $4, %zmm0, %zmm3 {%k1} ## encoding: [0x62,0xf1,0x65,0x49,0x72,0xd0,0x04] +; X86-NEXT: vpsrld $5, %zmm0, %zmm1 ## encoding: [0x62,0xf1,0x75,0x48,0x72,0xd0,0x05] +; X86-NEXT: vpsrld $6, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x6d,0xc9,0x72,0xd0,0x06] +; X86-NEXT: vmovdqa64 %zmm3, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3] ; X86-NEXT: retl ## encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_psrl_di_512: ; X64: ## %bb.0: +; X64-NEXT: vmovdqa64 %zmm1, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd9] ; X64-NEXT: kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce] -; X64-NEXT: vpsrld $4, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xd0,0x04] -; X64-NEXT: vpsrld $5, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0x72,0xd0,0x05] -; X64-NEXT: vpsrld $6, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x72,0xd0,0x06] -; X64-NEXT: vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0] -; X64-NEXT: vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0] +; X64-NEXT: vpsrld $4, %zmm0, %zmm3 {%k1} ## encoding: [0x62,0xf1,0x65,0x49,0x72,0xd0,0x04] +; X64-NEXT: vpsrld $5, %zmm0, %zmm1 ## encoding: [0x62,0xf1,0x75,0x48,0x72,0xd0,0x05] +; X64-NEXT: vpsrld $6, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x6d,0xc9,0x72,0xd0,0x06] +; X64-NEXT: vmovdqa64 %zmm3, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3] ; X64-NEXT: retq ## encoding: [0xc3] - %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.di.512(<16 x i32> %x0, i32 4, <16 x i32> %x2, i16 %x3) + %res0 = call <16 x i32> @llvm.x86.avx512.mask.psrl.di.512(<16 x i32> %x0, i32 4, <16 x i32> %x2, i16 %x3) %res1 = call <16 x i32> @llvm.x86.avx512.mask.psrl.di.512(<16 x i32> %x0, i32 5, <16 x i32> %x2, i16 -1) %res2 = call <16 x i32> @llvm.x86.avx512.mask.psrl.di.512(<16 x i32> %x0, i32 6, <16 x i32> zeroinitializer, i16 %x3) - %res3 = add <16 x i32> %res, %res1 - %res4 = add <16 x i32> %res3, %res2 - ret <16 x i32> %res4 + %res3 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } poison, <16 x i32> %res0, 0 + %res4 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } %res3, <16 x i32> %res1, 1 + %res5 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } %res4, <16 x i32> %res2, 2 + ret { <16 x i32>, <16 x i32>, <16 x i32> } %res5 } declare <16 x i32> @llvm.x86.avx512.mask.psra.di.512(<16 x i32>, i32, <16 x i32>, i16) diff --git a/llvm/test/CodeGen/X86/avx512-rotate.ll b/llvm/test/CodeGen/X86/avx512-rotate.ll --- a/llvm/test/CodeGen/X86/avx512-rotate.ll +++ b/llvm/test/CodeGen/X86/avx512-rotate.ll @@ -12,230 +12,238 @@ ; Tests showing replacement of variable rotates with immediate splat versions. -define <16 x i32> @test_splat_rol_v16i32(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) { +define { <16 x i32>, <16 x i32>, <16 x i32> } @test_splat_rol_v16i32(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) { ; KNL-LABEL: test_splat_rol_v16i32: ; KNL: # %bb.0: +; KNL-NEXT: vmovdqa64 %zmm1, %zmm3 ; KNL-NEXT: kmovw %edi, %k1 -; KNL-NEXT: vprold $5, %zmm0, %zmm1 {%k1} -; KNL-NEXT: vprold $6, %zmm0, %zmm2 {%k1} {z} -; KNL-NEXT: vpaddd %zmm2, %zmm1, %zmm1 -; KNL-NEXT: vprold $7, %zmm0, %zmm0 -; KNL-NEXT: vpaddd %zmm0, %zmm1, %zmm0 +; KNL-NEXT: vprold $5, %zmm0, %zmm3 {%k1} +; KNL-NEXT: vprold $6, %zmm0, %zmm1 {%k1} {z} +; KNL-NEXT: vprold $7, %zmm0, %zmm2 +; KNL-NEXT: vmovdqa64 %zmm3, %zmm0 ; KNL-NEXT: retq ; ; SKX-LABEL: test_splat_rol_v16i32: ; SKX: # %bb.0: +; SKX-NEXT: vmovdqa64 %zmm1, %zmm3 ; SKX-NEXT: kmovd %edi, %k1 -; SKX-NEXT: vprold $5, %zmm0, %zmm1 {%k1} -; SKX-NEXT: vprold $6, %zmm0, %zmm2 {%k1} {z} -; SKX-NEXT: vpaddd %zmm2, %zmm1, %zmm1 -; SKX-NEXT: vprold $7, %zmm0, %zmm0 -; SKX-NEXT: vpaddd %zmm0, %zmm1, %zmm0 +; SKX-NEXT: vprold $5, %zmm0, %zmm3 {%k1} +; SKX-NEXT: vprold $6, %zmm0, %zmm1 {%k1} {z} +; SKX-NEXT: vprold $7, %zmm0, %zmm2 +; SKX-NEXT: vmovdqa64 %zmm3, %zmm0 ; SKX-NEXT: retq - %res = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> , <16 x i32> %x1, i16 %x2) + %res0 = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> , <16 x i32> %x1, i16 %x2) %res1 = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> , <16 x i32> zeroinitializer, i16 %x2) %res2 = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> , <16 x i32> %x1, i16 -1) - %res3 = add <16 x i32> %res, %res1 - %res4 = add <16 x i32> %res3, %res2 - ret <16 x i32> %res4 + %res3 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } poison, <16 x i32> %res0, 0 + %res4 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } %res3, <16 x i32> %res1, 1 + %res5 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } %res4, <16 x i32> %res2, 2 + ret { <16 x i32>, <16 x i32>, <16 x i32> } %res5 } -define <8 x i64>@test_splat_rol_v8i64(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) { +define { <8 x i64>, <8 x i64>, <8 x i64> } @test_splat_rol_v8i64(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) { ; KNL-LABEL: test_splat_rol_v8i64: ; KNL: # %bb.0: +; KNL-NEXT: vmovdqa64 %zmm1, %zmm3 ; KNL-NEXT: kmovw %edi, %k1 -; KNL-NEXT: vprolq $5, %zmm0, %zmm1 {%k1} -; KNL-NEXT: vprolq $6, %zmm0, %zmm2 {%k1} {z} -; KNL-NEXT: vpaddq %zmm2, %zmm1, %zmm1 -; KNL-NEXT: vprolq $7, %zmm0, %zmm0 -; KNL-NEXT: vpaddq %zmm0, %zmm1, %zmm0 +; KNL-NEXT: vprolq $5, %zmm0, %zmm3 {%k1} +; KNL-NEXT: vprolq $6, %zmm0, %zmm1 {%k1} {z} +; KNL-NEXT: vprolq $7, %zmm0, %zmm2 +; KNL-NEXT: vmovdqa64 %zmm3, %zmm0 ; KNL-NEXT: retq ; ; SKX-LABEL: test_splat_rol_v8i64: ; SKX: # %bb.0: +; SKX-NEXT: vmovdqa64 %zmm1, %zmm3 ; SKX-NEXT: kmovd %edi, %k1 -; SKX-NEXT: vprolq $5, %zmm0, %zmm1 {%k1} -; SKX-NEXT: vprolq $6, %zmm0, %zmm2 {%k1} {z} -; SKX-NEXT: vpaddq %zmm2, %zmm1, %zmm1 -; SKX-NEXT: vprolq $7, %zmm0, %zmm0 -; SKX-NEXT: vpaddq %zmm0, %zmm1, %zmm0 +; SKX-NEXT: vprolq $5, %zmm0, %zmm3 {%k1} +; SKX-NEXT: vprolq $6, %zmm0, %zmm1 {%k1} {z} +; SKX-NEXT: vprolq $7, %zmm0, %zmm2 +; SKX-NEXT: vmovdqa64 %zmm3, %zmm0 ; SKX-NEXT: retq - %res = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> , <8 x i64> %x1, i8 %x2) + %res0 = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> , <8 x i64> %x1, i8 %x2) %res1 = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> , <8 x i64> zeroinitializer, i8 %x2) %res2 = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> , <8 x i64> %x1, i8 -1) - %res3 = add <8 x i64> %res, %res1 - %res4 = add <8 x i64> %res3, %res2 - ret <8 x i64> %res4 + %res3 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } poison, <8 x i64> %res0, 0 + %res4 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } %res3, <8 x i64> %res1, 1 + %res5 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } %res4, <8 x i64> %res2, 2 + ret { <8 x i64>, <8 x i64>, <8 x i64> } %res5 } -define <16 x i32> @test_splat_ror_v16i32(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) { +define { <16 x i32>, <16 x i32>, <16 x i32> } @test_splat_ror_v16i32(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) { ; KNL-LABEL: test_splat_ror_v16i32: ; KNL: # %bb.0: +; KNL-NEXT: vmovdqa64 %zmm1, %zmm3 ; KNL-NEXT: kmovw %edi, %k1 -; KNL-NEXT: vprord $5, %zmm0, %zmm1 {%k1} -; KNL-NEXT: vprord $6, %zmm0, %zmm2 {%k1} {z} -; KNL-NEXT: vpaddd %zmm2, %zmm1, %zmm1 -; KNL-NEXT: vprord $7, %zmm0, %zmm0 -; KNL-NEXT: vpaddd %zmm0, %zmm1, %zmm0 +; KNL-NEXT: vprord $5, %zmm0, %zmm3 {%k1} +; KNL-NEXT: vprord $6, %zmm0, %zmm1 {%k1} {z} +; KNL-NEXT: vprord $7, %zmm0, %zmm2 +; KNL-NEXT: vmovdqa64 %zmm3, %zmm0 ; KNL-NEXT: retq ; ; SKX-LABEL: test_splat_ror_v16i32: ; SKX: # %bb.0: +; SKX-NEXT: vmovdqa64 %zmm1, %zmm3 ; SKX-NEXT: kmovd %edi, %k1 -; SKX-NEXT: vprord $5, %zmm0, %zmm1 {%k1} -; SKX-NEXT: vprord $6, %zmm0, %zmm2 {%k1} {z} -; SKX-NEXT: vpaddd %zmm2, %zmm1, %zmm1 -; SKX-NEXT: vprord $7, %zmm0, %zmm0 -; SKX-NEXT: vpaddd %zmm0, %zmm1, %zmm0 +; SKX-NEXT: vprord $5, %zmm0, %zmm3 {%k1} +; SKX-NEXT: vprord $6, %zmm0, %zmm1 {%k1} {z} +; SKX-NEXT: vprord $7, %zmm0, %zmm2 +; SKX-NEXT: vmovdqa64 %zmm3, %zmm0 ; SKX-NEXT: retq - %res = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> , <16 x i32> %x1, i16 %x2) + %res0 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> , <16 x i32> %x1, i16 %x2) %res1 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> , <16 x i32> zeroinitializer, i16 %x2) %res2 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> , <16 x i32> %x1, i16 -1) - %res3 = add <16 x i32> %res, %res1 - %res4 = add <16 x i32> %res3, %res2 - ret <16 x i32> %res4 + %res3 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } poison, <16 x i32> %res0, 0 + %res4 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } %res3, <16 x i32> %res1, 1 + %res5 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } %res4, <16 x i32> %res2, 2 + ret { <16 x i32>, <16 x i32>, <16 x i32> } %res5 } -define <8 x i64>@test_splat_ror_v8i64(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) { +define { <8 x i64>, <8 x i64>, <8 x i64> } @test_splat_ror_v8i64(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) { ; KNL-LABEL: test_splat_ror_v8i64: ; KNL: # %bb.0: +; KNL-NEXT: vmovdqa64 %zmm1, %zmm3 ; KNL-NEXT: kmovw %edi, %k1 -; KNL-NEXT: vprorq $5, %zmm0, %zmm1 {%k1} -; KNL-NEXT: vprorq $6, %zmm0, %zmm2 {%k1} {z} -; KNL-NEXT: vpaddq %zmm2, %zmm1, %zmm1 -; KNL-NEXT: vprorq $7, %zmm0, %zmm0 -; KNL-NEXT: vpaddq %zmm0, %zmm1, %zmm0 +; KNL-NEXT: vprorq $5, %zmm0, %zmm3 {%k1} +; KNL-NEXT: vprorq $6, %zmm0, %zmm1 {%k1} {z} +; KNL-NEXT: vprorq $7, %zmm0, %zmm2 +; KNL-NEXT: vmovdqa64 %zmm3, %zmm0 ; KNL-NEXT: retq ; ; SKX-LABEL: test_splat_ror_v8i64: ; SKX: # %bb.0: +; SKX-NEXT: vmovdqa64 %zmm1, %zmm3 ; SKX-NEXT: kmovd %edi, %k1 -; SKX-NEXT: vprorq $5, %zmm0, %zmm1 {%k1} -; SKX-NEXT: vprorq $6, %zmm0, %zmm2 {%k1} {z} -; SKX-NEXT: vpaddq %zmm2, %zmm1, %zmm1 -; SKX-NEXT: vprorq $7, %zmm0, %zmm0 -; SKX-NEXT: vpaddq %zmm0, %zmm1, %zmm0 +; SKX-NEXT: vprorq $5, %zmm0, %zmm3 {%k1} +; SKX-NEXT: vprorq $6, %zmm0, %zmm1 {%k1} {z} +; SKX-NEXT: vprorq $7, %zmm0, %zmm2 +; SKX-NEXT: vmovdqa64 %zmm3, %zmm0 ; SKX-NEXT: retq - %res = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> , <8 x i64> %x1, i8 %x2) + %res0 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> , <8 x i64> %x1, i8 %x2) %res1 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> , <8 x i64> zeroinitializer, i8 %x2) %res2 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> , <8 x i64> %x1, i8 -1) - %res3 = add <8 x i64> %res, %res1 - %res4 = add <8 x i64> %res3, %res2 - ret <8 x i64> %res4 + %res3 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } poison, <8 x i64> %res0, 0 + %res4 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } %res3, <8 x i64> %res1, 1 + %res5 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } %res4, <8 x i64> %res2, 2 + ret { <8 x i64>, <8 x i64>, <8 x i64> } %res5 } ; Tests showing replacement of out-of-bounds variable rotates with in-bounds immediate splat versions. -define <16 x i32> @test_splat_bounds_rol_v16i32(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) { +define { <16 x i32>, <16 x i32>, <16 x i32> } @test_splat_bounds_rol_v16i32(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) { ; KNL-LABEL: test_splat_bounds_rol_v16i32: ; KNL: # %bb.0: +; KNL-NEXT: vmovdqa64 %zmm1, %zmm3 ; KNL-NEXT: kmovw %edi, %k1 -; KNL-NEXT: vprold $1, %zmm0, %zmm1 {%k1} -; KNL-NEXT: vprold $31, %zmm0, %zmm2 {%k1} {z} -; KNL-NEXT: vpaddd %zmm2, %zmm1, %zmm1 -; KNL-NEXT: vprold $30, %zmm0, %zmm0 -; KNL-NEXT: vpaddd %zmm0, %zmm1, %zmm0 +; KNL-NEXT: vprold $1, %zmm0, %zmm3 {%k1} +; KNL-NEXT: vprold $31, %zmm0, %zmm1 {%k1} {z} +; KNL-NEXT: vprold $30, %zmm0, %zmm2 +; KNL-NEXT: vmovdqa64 %zmm3, %zmm0 ; KNL-NEXT: retq ; ; SKX-LABEL: test_splat_bounds_rol_v16i32: ; SKX: # %bb.0: +; SKX-NEXT: vmovdqa64 %zmm1, %zmm3 ; SKX-NEXT: kmovd %edi, %k1 -; SKX-NEXT: vprold $1, %zmm0, %zmm1 {%k1} -; SKX-NEXT: vprold $31, %zmm0, %zmm2 {%k1} {z} -; SKX-NEXT: vpaddd %zmm2, %zmm1, %zmm1 -; SKX-NEXT: vprold $30, %zmm0, %zmm0 -; SKX-NEXT: vpaddd %zmm0, %zmm1, %zmm0 +; SKX-NEXT: vprold $1, %zmm0, %zmm3 {%k1} +; SKX-NEXT: vprold $31, %zmm0, %zmm1 {%k1} {z} +; SKX-NEXT: vprold $30, %zmm0, %zmm2 +; SKX-NEXT: vmovdqa64 %zmm3, %zmm0 ; SKX-NEXT: retq - %res = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> , <16 x i32> %x1, i16 %x2) + %res0 = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> , <16 x i32> %x1, i16 %x2) %res1 = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> , <16 x i32> zeroinitializer, i16 %x2) %res2 = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> , <16 x i32> %x1, i16 -1) - %res3 = add <16 x i32> %res, %res1 - %res4 = add <16 x i32> %res3, %res2 - ret <16 x i32> %res4 + %res3 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } poison, <16 x i32> %res0, 0 + %res4 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } %res3, <16 x i32> %res1, 1 + %res5 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } %res4, <16 x i32> %res2, 2 + ret { <16 x i32>, <16 x i32>, <16 x i32> } %res5 } -define <8 x i64>@test_splat_bounds_rol_v8i64(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) { +define { <8 x i64>, <8 x i64>, <8 x i64> } @test_splat_bounds_rol_v8i64(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) { ; KNL-LABEL: test_splat_bounds_rol_v8i64: ; KNL: # %bb.0: +; KNL-NEXT: vmovdqa64 %zmm1, %zmm3 ; KNL-NEXT: kmovw %edi, %k1 -; KNL-NEXT: vprolq $62, %zmm0, %zmm1 {%k1} -; KNL-NEXT: vprolq $1, %zmm0, %zmm2 {%k1} {z} -; KNL-NEXT: vpaddq %zmm2, %zmm1, %zmm1 -; KNL-NEXT: vprolq $63, %zmm0, %zmm0 -; KNL-NEXT: vpaddq %zmm0, %zmm1, %zmm0 +; KNL-NEXT: vprolq $62, %zmm0, %zmm3 {%k1} +; KNL-NEXT: vprolq $1, %zmm0, %zmm1 {%k1} {z} +; KNL-NEXT: vprolq $63, %zmm0, %zmm2 +; KNL-NEXT: vmovdqa64 %zmm3, %zmm0 ; KNL-NEXT: retq ; ; SKX-LABEL: test_splat_bounds_rol_v8i64: ; SKX: # %bb.0: +; SKX-NEXT: vmovdqa64 %zmm1, %zmm3 ; SKX-NEXT: kmovd %edi, %k1 -; SKX-NEXT: vprolq $62, %zmm0, %zmm1 {%k1} -; SKX-NEXT: vprolq $1, %zmm0, %zmm2 {%k1} {z} -; SKX-NEXT: vpaddq %zmm2, %zmm1, %zmm1 -; SKX-NEXT: vprolq $63, %zmm0, %zmm0 -; SKX-NEXT: vpaddq %zmm0, %zmm1, %zmm0 +; SKX-NEXT: vprolq $62, %zmm0, %zmm3 {%k1} +; SKX-NEXT: vprolq $1, %zmm0, %zmm1 {%k1} {z} +; SKX-NEXT: vprolq $63, %zmm0, %zmm2 +; SKX-NEXT: vmovdqa64 %zmm3, %zmm0 ; SKX-NEXT: retq - %res = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> , <8 x i64> %x1, i8 %x2) + %res0 = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> , <8 x i64> %x1, i8 %x2) %res1 = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> , <8 x i64> zeroinitializer, i8 %x2) %res2 = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> , <8 x i64> %x1, i8 -1) - %res3 = add <8 x i64> %res, %res1 - %res4 = add <8 x i64> %res3, %res2 - ret <8 x i64> %res4 + %res3 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } poison, <8 x i64> %res0, 0 + %res4 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } %res3, <8 x i64> %res1, 1 + %res5 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } %res4, <8 x i64> %res2, 2 + ret { <8 x i64>, <8 x i64>, <8 x i64> } %res5 } -define <16 x i32> @test_splat_bounds_ror_v16i32(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) { +define { <16 x i32>, <16 x i32>, <16 x i32> } @test_splat_bounds_ror_v16i32(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) { ; KNL-LABEL: test_splat_bounds_ror_v16i32: ; KNL: # %bb.0: +; KNL-NEXT: vmovdqa64 %zmm1, %zmm3 ; KNL-NEXT: kmovw %edi, %k1 -; KNL-NEXT: vprord $1, %zmm0, %zmm1 {%k1} -; KNL-NEXT: vprord $31, %zmm0, %zmm2 {%k1} {z} -; KNL-NEXT: vpaddd %zmm2, %zmm1, %zmm1 -; KNL-NEXT: vprord $30, %zmm0, %zmm0 -; KNL-NEXT: vpaddd %zmm0, %zmm1, %zmm0 +; KNL-NEXT: vprord $1, %zmm0, %zmm3 {%k1} +; KNL-NEXT: vprord $31, %zmm0, %zmm1 {%k1} {z} +; KNL-NEXT: vprord $30, %zmm0, %zmm2 +; KNL-NEXT: vmovdqa64 %zmm3, %zmm0 ; KNL-NEXT: retq ; ; SKX-LABEL: test_splat_bounds_ror_v16i32: ; SKX: # %bb.0: +; SKX-NEXT: vmovdqa64 %zmm1, %zmm3 ; SKX-NEXT: kmovd %edi, %k1 -; SKX-NEXT: vprord $1, %zmm0, %zmm1 {%k1} -; SKX-NEXT: vprord $31, %zmm0, %zmm2 {%k1} {z} -; SKX-NEXT: vpaddd %zmm2, %zmm1, %zmm1 -; SKX-NEXT: vprord $30, %zmm0, %zmm0 -; SKX-NEXT: vpaddd %zmm0, %zmm1, %zmm0 +; SKX-NEXT: vprord $1, %zmm0, %zmm3 {%k1} +; SKX-NEXT: vprord $31, %zmm0, %zmm1 {%k1} {z} +; SKX-NEXT: vprord $30, %zmm0, %zmm2 +; SKX-NEXT: vmovdqa64 %zmm3, %zmm0 ; SKX-NEXT: retq - %res = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> , <16 x i32> %x1, i16 %x2) + %res0 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> , <16 x i32> %x1, i16 %x2) %res1 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> , <16 x i32> zeroinitializer, i16 %x2) %res2 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> , <16 x i32> %x1, i16 -1) - %res3 = add <16 x i32> %res, %res1 - %res4 = add <16 x i32> %res3, %res2 - ret <16 x i32> %res4 + %res3 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } poison, <16 x i32> %res0, 0 + %res4 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } %res3, <16 x i32> %res1, 1 + %res5 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } %res4, <16 x i32> %res2, 2 + ret { <16 x i32>, <16 x i32>, <16 x i32> } %res5 } -define <8 x i64>@test_splat_bounds_ror_v8i64(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) { +define { <8 x i64>, <8 x i64>, <8 x i64> } @test_splat_bounds_ror_v8i64(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) { ; KNL-LABEL: test_splat_bounds_ror_v8i64: ; KNL: # %bb.0: +; KNL-NEXT: vmovdqa64 %zmm1, %zmm3 ; KNL-NEXT: kmovw %edi, %k1 -; KNL-NEXT: vprorq $62, %zmm0, %zmm1 {%k1} -; KNL-NEXT: vprorq $1, %zmm0, %zmm2 {%k1} {z} -; KNL-NEXT: vpaddq %zmm2, %zmm1, %zmm1 -; KNL-NEXT: vprorq $63, %zmm0, %zmm0 -; KNL-NEXT: vpaddq %zmm0, %zmm1, %zmm0 +; KNL-NEXT: vprorq $62, %zmm0, %zmm3 {%k1} +; KNL-NEXT: vprorq $1, %zmm0, %zmm1 {%k1} {z} +; KNL-NEXT: vprorq $63, %zmm0, %zmm2 +; KNL-NEXT: vmovdqa64 %zmm3, %zmm0 ; KNL-NEXT: retq ; ; SKX-LABEL: test_splat_bounds_ror_v8i64: ; SKX: # %bb.0: +; SKX-NEXT: vmovdqa64 %zmm1, %zmm3 ; SKX-NEXT: kmovd %edi, %k1 -; SKX-NEXT: vprorq $62, %zmm0, %zmm1 {%k1} -; SKX-NEXT: vprorq $1, %zmm0, %zmm2 {%k1} {z} -; SKX-NEXT: vpaddq %zmm2, %zmm1, %zmm1 -; SKX-NEXT: vprorq $63, %zmm0, %zmm0 -; SKX-NEXT: vpaddq %zmm0, %zmm1, %zmm0 +; SKX-NEXT: vprorq $62, %zmm0, %zmm3 {%k1} +; SKX-NEXT: vprorq $1, %zmm0, %zmm1 {%k1} {z} +; SKX-NEXT: vprorq $63, %zmm0, %zmm2 +; SKX-NEXT: vmovdqa64 %zmm3, %zmm0 ; SKX-NEXT: retq - %res = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> , <8 x i64> %x1, i8 %x2) + %res0 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> , <8 x i64> %x1, i8 %x2) %res1 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> , <8 x i64> zeroinitializer, i8 %x2) %res2 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> , <8 x i64> %x1, i8 -1) - %res3 = add <8 x i64> %res, %res1 - %res4 = add <8 x i64> %res3, %res2 - ret <8 x i64> %res4 + %res3 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } poison, <8 x i64> %res0, 0 + %res4 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } %res3, <8 x i64> %res1, 1 + %res5 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } %res4, <8 x i64> %res2, 2 + ret { <8 x i64>, <8 x i64>, <8 x i64> } %res5 } ; Constant folding diff --git a/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll --- a/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll @@ -46,61 +46,60 @@ declare <64 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.512(i8, <64 x i8>, i64) -define <64 x i8> @test_int_x86_avx512_mask_pbroadcast_b_gpr_512(i8 %x0, <64 x i8> %x1, i64 %mask) nounwind { +define { <64 x i8>, <64 x i8>, <64 x i8> } @test_int_x86_avx512_mask_pbroadcast_b_gpr_512(i8 %x0, <64 x i8> %x1, i64 %mask) nounwind { ; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_512: ; X86: # %bb.0: ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08] -; X86-NEXT: vpbroadcastb {{[0-9]+}}(%esp), %zmm1 # encoding: [0x62,0xf2,0x7d,0x48,0x78,0x4c,0x24,0x04] -; X86-NEXT: vmovdqu8 %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf1,0x7f,0x49,0x6f,0xc1] -; X86-NEXT: vmovdqu8 %zmm1, %zmm2 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xc9,0x6f,0xd1] -; X86-NEXT: vpaddb %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfc,0xc2] -; X86-NEXT: vpaddb %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfc,0xc0] +; X86-NEXT: vpbroadcastb {{[0-9]+}}(%esp), %zmm3 # encoding: [0x62,0xf2,0x7d,0x48,0x78,0x5c,0x24,0x04] +; X86-NEXT: vpblendmb %zmm3, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x66,0xcb] +; X86-NEXT: vmovdqu8 %zmm3, %zmm2 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xc9,0x6f,0xd3] +; X86-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_512: ; X64: # %bb.0: -; X64-NEXT: vpbroadcastb %edi, %zmm1 # encoding: [0x62,0xf2,0x7d,0x48,0x7a,0xcf] +; X64-NEXT: vmovdqa64 %zmm0, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc8] +; X64-NEXT: vpbroadcastb %edi, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x7a,0xc7] ; X64-NEXT: kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce] -; X64-NEXT: vpbroadcastb %edi, %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x7a,0xc7] +; X64-NEXT: vpbroadcastb %edi, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x7a,0xcf] ; X64-NEXT: vpbroadcastb %edi, %zmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x7a,0xd7] -; X64-NEXT: vpaddb %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfc,0xc2] -; X64-NEXT: vpaddb %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfc,0xc0] ; X64-NEXT: retq # encoding: [0xc3] %res = call <64 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.512(i8 %x0, <64 x i8> %x1, i64 -1) %res1 = call <64 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.512(i8 %x0, <64 x i8> %x1, i64 %mask) %res2 = call <64 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.512(i8 %x0, <64 x i8> zeroinitializer, i64 %mask) - %res3 = add <64 x i8> %res, %res1 - %res4 = add <64 x i8> %res2, %res3 - ret <64 x i8> %res4 - } + %res3 = insertvalue { <64 x i8>, <64 x i8>, <64 x i8> } poison, <64 x i8> %res, 0 + %res4 = insertvalue { <64 x i8>, <64 x i8>, <64 x i8> } %res3, <64 x i8> %res1, 1 + %res5 = insertvalue { <64 x i8>, <64 x i8>, <64 x i8> } %res4, <64 x i8> %res2, 2 + ret { <64 x i8>, <64 x i8>, <64 x i8> } %res5 +} declare <32 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.512(i16, <32 x i16>, i32) -define <32 x i16> @test_int_x86_avx512_mask_pbroadcast_w_gpr_512(i16 %x0, <32 x i16> %x1, i32 %mask) nounwind { + +define { <32 x i16>, <32 x i16>, <32 x i16> } @test_int_x86_avx512_mask_pbroadcast_w_gpr_512(i16 %x0, <32 x i16> %x1, i32 %mask) nounwind { ; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_512: ; X86: # %bb.0: -; X86-NEXT: vpbroadcastw {{[0-9]+}}(%esp), %zmm1 # encoding: [0x62,0xf2,0x7d,0x48,0x79,0x4c,0x24,0x02] +; X86-NEXT: vpbroadcastw {{[0-9]+}}(%esp), %zmm3 # encoding: [0x62,0xf2,0x7d,0x48,0x79,0x5c,0x24,0x02] ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] -; X86-NEXT: vmovdqu16 %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf1,0xff,0x49,0x6f,0xc1] -; X86-NEXT: vmovdqu16 %zmm1, %zmm2 {%k1} {z} # encoding: [0x62,0xf1,0xff,0xc9,0x6f,0xd1] -; X86-NEXT: vpaddw %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc2] -; X86-NEXT: vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0] +; X86-NEXT: vpblendmw %zmm3, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x66,0xcb] +; X86-NEXT: vmovdqu16 %zmm3, %zmm2 {%k1} {z} # encoding: [0x62,0xf1,0xff,0xc9,0x6f,0xd3] +; X86-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_512: ; X64: # %bb.0: -; X64-NEXT: vpbroadcastw %edi, %zmm1 # encoding: [0x62,0xf2,0x7d,0x48,0x7b,0xcf] +; X64-NEXT: vmovdqa64 %zmm0, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc8] +; X64-NEXT: vpbroadcastw %edi, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x7b,0xc7] ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] -; X64-NEXT: vpbroadcastw %edi, %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x7b,0xc7] +; X64-NEXT: vpbroadcastw %edi, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x7b,0xcf] ; X64-NEXT: vpbroadcastw %edi, %zmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x7b,0xd7] -; X64-NEXT: vpaddw %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc2] -; X64-NEXT: vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0] -; X64-NEXT: retq # encoding: [0xc3] - %res = call <32 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.512(i16 %x0, <32 x i16> %x1, i32 -1) - %res1 = call <32 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.512(i16 %x0, <32 x i16> %x1, i32 %mask) - %res2 = call <32 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.512(i16 %x0, <32 x i16> zeroinitializer, i32 %mask) - %res3 = add <32 x i16> %res, %res1 - %res4 = add <32 x i16> %res2, %res3 - ret <32 x i16> %res4 +; X64-NEXT: retq # encoding: [0xc3] + %res = call <32 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.512(i16 %x0, <32 x i16> %x1, i32 -1) + %res1 = call <32 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.512(i16 %x0, <32 x i16> %x1, i32 %mask) + %res2 = call <32 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.512(i16 %x0, <32 x i16> zeroinitializer, i32 %mask) + %res3 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } poison, <32 x i16> %res, 0 + %res4 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } %res3, <32 x i16> %res1, 1 + %res5 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } %res4, <32 x i16> %res2, 2 + ret { <32 x i16>, <32 x i16>, <32 x i16> } %res5 } declare void @llvm.x86.avx512.mask.storeu.b.512(i8*, <64 x i8>, i64) @@ -155,77 +154,78 @@ declare <32 x i16> @llvm.x86.avx512.mask.loadu.w.512(i8*, <32 x i16>, i32) -define <32 x i16> @test_int_x86_avx512_mask_loadu_w_512(i8* %ptr, i8* %ptr2, <32 x i16> %x1, i32 %mask) nounwind { +define { <32 x i16>, <32 x i16>, <32 x i16> } @test_int_x86_avx512_mask_loadu_w_512(i8* %ptr, i8* %ptr2, <32 x i16> %x1, i32 %mask) nounwind { ; X86-LABEL: test_int_x86_avx512_mask_loadu_w_512: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] ; X86-NEXT: vmovdqu64 (%ecx), %zmm0 # encoding: [0x62,0xf1,0xfe,0x48,0x6f,0x01] ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x0c] -; X86-NEXT: vmovdqu16 (%eax), %zmm0 {%k1} # encoding: [0x62,0xf1,0xff,0x49,0x6f,0x00] -; X86-NEXT: vmovdqu16 (%ecx), %zmm1 {%k1} {z} # encoding: [0x62,0xf1,0xff,0xc9,0x6f,0x09] -; X86-NEXT: vpaddw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc1] +; X86-NEXT: vpblendmw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x66,0x08] +; X86-NEXT: vmovdqu16 (%ecx), %zmm2 {%k1} {z} # encoding: [0x62,0xf1,0xff,0xc9,0x6f,0x11] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_loadu_w_512: ; X64: # %bb.0: ; X64-NEXT: vmovdqu64 (%rdi), %zmm0 # encoding: [0x62,0xf1,0xfe,0x48,0x6f,0x07] ; X64-NEXT: kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca] -; X64-NEXT: vmovdqu16 (%rsi), %zmm0 {%k1} # encoding: [0x62,0xf1,0xff,0x49,0x6f,0x06] -; X64-NEXT: vmovdqu16 (%rdi), %zmm1 {%k1} {z} # encoding: [0x62,0xf1,0xff,0xc9,0x6f,0x0f] -; X64-NEXT: vpaddw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc1] +; X64-NEXT: vpblendmw (%rsi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x66,0x0e] +; X64-NEXT: vmovdqu16 (%rdi), %zmm2 {%k1} {z} # encoding: [0x62,0xf1,0xff,0xc9,0x6f,0x17] ; X64-NEXT: retq # encoding: [0xc3] - %res0 = call <32 x i16> @llvm.x86.avx512.mask.loadu.w.512(i8* %ptr, <32 x i16> %x1, i32 -1) - %res = call <32 x i16> @llvm.x86.avx512.mask.loadu.w.512(i8* %ptr2, <32 x i16> %res0, i32 %mask) - %res1 = call <32 x i16> @llvm.x86.avx512.mask.loadu.w.512(i8* %ptr, <32 x i16> zeroinitializer, i32 %mask) - %res2 = add <32 x i16> %res, %res1 - ret <32 x i16> %res2 + %res = call <32 x i16> @llvm.x86.avx512.mask.loadu.w.512(i8* %ptr, <32 x i16> %x1, i32 -1) + %res1 = call <32 x i16> @llvm.x86.avx512.mask.loadu.w.512(i8* %ptr2, <32 x i16> %res, i32 %mask) + %res2 = call <32 x i16> @llvm.x86.avx512.mask.loadu.w.512(i8* %ptr, <32 x i16> zeroinitializer, i32 %mask) + %res3 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } poison, <32 x i16> %res, 0 + %res4 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } %res3, <32 x i16> %res1, 1 + %res5 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } %res4, <32 x i16> %res2, 2 + ret { <32 x i16>, <32 x i16>, <32 x i16> } %res5 } declare <64 x i8> @llvm.x86.avx512.mask.loadu.b.512(i8*, <64 x i8>, i64) -define <64 x i8> @test_int_x86_avx512_mask_loadu_b_512(i8* %ptr, i8* %ptr2, <64 x i8> %x1, i64 %mask) nounwind { +define { <64 x i8>, <64 x i8>, <64 x i8> } @test_int_x86_avx512_mask_loadu_b_512(i8* %ptr, i8* %ptr2, <64 x i8> %x1, i64 %mask) nounwind { ; X86-LABEL: test_int_x86_avx512_mask_loadu_b_512: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x0c] ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] ; X86-NEXT: vmovdqu64 (%ecx), %zmm0 # encoding: [0x62,0xf1,0xfe,0x48,0x6f,0x01] -; X86-NEXT: vmovdqu8 (%eax), %zmm0 {%k1} # encoding: [0x62,0xf1,0x7f,0x49,0x6f,0x00] -; X86-NEXT: vmovdqu8 (%ecx), %zmm1 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xc9,0x6f,0x09] -; X86-NEXT: vpaddb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfc,0xc1] +; X86-NEXT: vpblendmb (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x66,0x08] +; X86-NEXT: vmovdqu8 (%ecx), %zmm2 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xc9,0x6f,0x11] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_loadu_b_512: ; X64: # %bb.0: ; X64-NEXT: vmovdqu64 (%rdi), %zmm0 # encoding: [0x62,0xf1,0xfe,0x48,0x6f,0x07] ; X64-NEXT: kmovq %rdx, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xca] -; X64-NEXT: vmovdqu8 (%rsi), %zmm0 {%k1} # encoding: [0x62,0xf1,0x7f,0x49,0x6f,0x06] -; X64-NEXT: vmovdqu8 (%rdi), %zmm1 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xc9,0x6f,0x0f] -; X64-NEXT: vpaddb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfc,0xc1] +; X64-NEXT: vpblendmb (%rsi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x66,0x0e] +; X64-NEXT: vmovdqu8 (%rdi), %zmm2 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xc9,0x6f,0x17] ; X64-NEXT: retq # encoding: [0xc3] - %res0 = call <64 x i8> @llvm.x86.avx512.mask.loadu.b.512(i8* %ptr, <64 x i8> %x1, i64 -1) - %res = call <64 x i8> @llvm.x86.avx512.mask.loadu.b.512(i8* %ptr2, <64 x i8> %res0, i64 %mask) - %res1 = call <64 x i8> @llvm.x86.avx512.mask.loadu.b.512(i8* %ptr, <64 x i8> zeroinitializer, i64 %mask) - %res2 = add <64 x i8> %res, %res1 - ret <64 x i8> %res2 + %res = call <64 x i8> @llvm.x86.avx512.mask.loadu.b.512(i8* %ptr, <64 x i8> %x1, i64 -1) + %res1 = call <64 x i8> @llvm.x86.avx512.mask.loadu.b.512(i8* %ptr2, <64 x i8> %res, i64 %mask) + %res2 = call <64 x i8> @llvm.x86.avx512.mask.loadu.b.512(i8* %ptr, <64 x i8> zeroinitializer, i64 %mask) + %res3 = insertvalue { <64 x i8>, <64 x i8>, <64 x i8> } poison, <64 x i8> %res, 0 + %res4 = insertvalue { <64 x i8>, <64 x i8>, <64 x i8> } %res3, <64 x i8> %res1, 1 + %res5 = insertvalue { <64 x i8>, <64 x i8>, <64 x i8> } %res4, <64 x i8> %res2, 2 + ret { <64 x i8>, <64 x i8>, <64 x i8> } %res5 } declare <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64>, i32) -define <8 x i64> @test_int_x86_avx512_psll_dq_512(<8 x i64> %x0) nounwind { +define { <8 x i64>, <8 x i64> } @test_int_x86_avx512_psll_dq_512(<8 x i64> %x0) nounwind { ; CHECK-LABEL: test_int_x86_avx512_psll_dq_512: ; CHECK: # %bb.0: -; CHECK-NEXT: vpslldq $8, %zmm0, %zmm1 # encoding: [0x62,0xf1,0x75,0x48,0x73,0xf8,0x08] -; CHECK-NEXT: # zmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zmm0[0,1,2,3,4,5,6,7],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[16,17,18,19,20,21,22,23],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[32,33,34,35,36,37,38,39],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[48,49,50,51,52,53,54,55] -; CHECK-NEXT: vpslldq $4, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x73,0xf8,0x04] -; CHECK-NEXT: # zmm0 = zero,zero,zero,zero,zmm0[0,1,2,3,4,5,6,7,8,9,10,11],zero,zero,zero,zero,zmm0[16,17,18,19,20,21,22,23,24,25,26,27],zero,zero,zero,zero,zmm0[32,33,34,35,36,37,38,39,40,41,42,43],zero,zero,zero,zero,zmm0[48,49,50,51,52,53,54,55,56,57,58,59] -; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] +; CHECK-NEXT: vpslldq $8, %zmm0, %zmm2 # encoding: [0x62,0xf1,0x6d,0x48,0x73,0xf8,0x08] +; CHECK-NEXT: # zmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zmm0[0,1,2,3,4,5,6,7],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[16,17,18,19,20,21,22,23],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[32,33,34,35,36,37,38,39],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[48,49,50,51,52,53,54,55] +; CHECK-NEXT: vpslldq $4, %zmm0, %zmm1 # encoding: [0x62,0xf1,0x75,0x48,0x73,0xf8,0x04] +; CHECK-NEXT: # zmm1 = zero,zero,zero,zero,zmm0[0,1,2,3,4,5,6,7,8,9,10,11],zero,zero,zero,zero,zmm0[16,17,18,19,20,21,22,23,24,25,26,27],zero,zero,zero,zero,zmm0[32,33,34,35,36,37,38,39,40,41,42,43],zero,zero,zero,zero,zmm0[48,49,50,51,52,53,54,55,56,57,58,59] +; CHECK-NEXT: vmovapd %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2] ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = call <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64> %x0, i32 8) %res1 = call <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64> %x0, i32 4) - %res2 = add <8 x i64> %res, %res1 - ret <8 x i64> %res2 + %res2 = insertvalue { <8 x i64>, <8 x i64> } poison, <8 x i64> %res, 0 + %res3 = insertvalue { <8 x i64>, <8 x i64> } %res2, <8 x i64> %res1, 1 + ret { <8 x i64>, <8 x i64> } %res3 } define <8 x i64> @test_int_x86_avx512_psll_load_dq_512(<8 x i64>* %p0) nounwind { @@ -248,19 +248,20 @@ declare <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64>, i32) -define <8 x i64> @test_int_x86_avx512_psrl_dq_512(<8 x i64> %x0) nounwind { +define { <8 x i64>, <8 x i64> } @test_int_x86_avx512_psrl_dq_512(<8 x i64> %x0) nounwind { ; CHECK-LABEL: test_int_x86_avx512_psrl_dq_512: ; CHECK: # %bb.0: -; CHECK-NEXT: vpsrldq $8, %zmm0, %zmm1 # encoding: [0x62,0xf1,0x75,0x48,0x73,0xd8,0x08] -; CHECK-NEXT: # zmm1 = zmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[40,41,42,43,44,45,46,47],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[56,57,58,59,60,61,62,63],zero,zero,zero,zero,zero,zero,zero,zero -; CHECK-NEXT: vpsrldq $4, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x73,0xd8,0x04] -; CHECK-NEXT: # zmm0 = zmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zmm0[20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,zmm0[36,37,38,39,40,41,42,43,44,45,46,47],zero,zero,zero,zero,zmm0[52,53,54,55,56,57,58,59,60,61,62,63],zero,zero,zero,zero -; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] +; CHECK-NEXT: vpsrldq $8, %zmm0, %zmm2 # encoding: [0x62,0xf1,0x6d,0x48,0x73,0xd8,0x08] +; CHECK-NEXT: # zmm2 = zmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[40,41,42,43,44,45,46,47],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[56,57,58,59,60,61,62,63],zero,zero,zero,zero,zero,zero,zero,zero +; CHECK-NEXT: vpsrldq $4, %zmm0, %zmm1 # encoding: [0x62,0xf1,0x75,0x48,0x73,0xd8,0x04] +; CHECK-NEXT: # zmm1 = zmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zmm0[20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,zmm0[36,37,38,39,40,41,42,43,44,45,46,47],zero,zero,zero,zero,zmm0[52,53,54,55,56,57,58,59,60,61,62,63],zero,zero,zero,zero +; CHECK-NEXT: vmovapd %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc2] ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = call <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64> %x0, i32 8) %res1 = call <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64> %x0, i32 4) - %res2 = add <8 x i64> %res, %res1 - ret <8 x i64> %res2 + %res2 = insertvalue { <8 x i64>, <8 x i64> } poison, <8 x i64> %res, 0 + %res3 = insertvalue { <8 x i64>, <8 x i64> } %res2, <8 x i64> %res1, 1 + ret { <8 x i64>, <8 x i64> } %res3 } define <8 x i64> @test_int_x86_avx512_psrl_load_dq_512(<8 x i64>* %p0) nounwind { @@ -1089,32 +1090,33 @@ declare <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16>, i32, <32 x i16>, i32) -define <32 x i16> @test_int_x86_avx512_mask_psrl_wi_512(<32 x i16> %x0, i32 %x1, <32 x i16> %x2, i32 %x3) nounwind { +define { <32 x i16>, <32 x i16>, <32 x i16> } @test_int_x86_avx512_mask_psrl_wi_512(<32 x i16> %x0, i32 %x1, <32 x i16> %x2, i32 %x3) nounwind { ; X86-LABEL: test_int_x86_avx512_mask_psrl_wi_512: ; X86: # %bb.0: +; X86-NEXT: vmovdqa64 %zmm1, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd9] ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] -; X86-NEXT: vpsrlw $3, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xd0,0x03] -; X86-NEXT: vpsrlw $4, %zmm0, %zmm2 # encoding: [0x62,0xf1,0x6d,0x48,0x71,0xd0,0x04] -; X86-NEXT: vpsrlw $5, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x71,0xd0,0x05] -; X86-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] -; X86-NEXT: vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0] +; X86-NEXT: vpsrlw $3, %zmm0, %zmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x49,0x71,0xd0,0x03] +; X86-NEXT: vpsrlw $4, %zmm0, %zmm1 # encoding: [0x62,0xf1,0x75,0x48,0x71,0xd0,0x04] +; X86-NEXT: vpsrlw $5, %zmm0, %zmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xc9,0x71,0xd0,0x05] +; X86-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_psrl_wi_512: ; X64: # %bb.0: +; X64-NEXT: vmovdqa64 %zmm1, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd9] ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] -; X64-NEXT: vpsrlw $3, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xd0,0x03] -; X64-NEXT: vpsrlw $4, %zmm0, %zmm2 # encoding: [0x62,0xf1,0x6d,0x48,0x71,0xd0,0x04] -; X64-NEXT: vpsrlw $5, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x71,0xd0,0x05] -; X64-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] -; X64-NEXT: vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0] +; X64-NEXT: vpsrlw $3, %zmm0, %zmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x49,0x71,0xd0,0x03] +; X64-NEXT: vpsrlw $4, %zmm0, %zmm1 # encoding: [0x62,0xf1,0x75,0x48,0x71,0xd0,0x04] +; X64-NEXT: vpsrlw $5, %zmm0, %zmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xc9,0x71,0xd0,0x05] +; X64-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3] ; X64-NEXT: retq # encoding: [0xc3] %res = call <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 %x3) %res1 = call <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16> %x0, i32 4, <32 x i16> %x2, i32 -1) %res2 = call <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16> %x0, i32 5, <32 x i16> zeroinitializer, i32 %x3) - %res3 = add <32 x i16> %res, %res1 - %res4 = add <32 x i16> %res3, %res2 - ret <32 x i16> %res4 + %res3 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } poison, <32 x i16> %res, 0 + %res4 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } %res3, <32 x i16> %res1, 1 + %res5 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } %res4, <32 x i16> %res2, 2 + ret { <32 x i16>, <32 x i16>, <32 x i16> } %res5 } declare <32 x i16> @llvm.x86.avx512.mask.psra.w.512(<32 x i16>, <8 x i16>, <32 x i16>, i32) @@ -1164,32 +1166,33 @@ declare <32 x i16> @llvm.x86.avx512.mask.psra.wi.512(<32 x i16>, i32, <32 x i16>, i32) -define <32 x i16> @test_int_x86_avx512_mask_psra_wi_512(<32 x i16> %x0, i32 %x1, <32 x i16> %x2, i32 %x3) nounwind { +define { <32 x i16>, <32 x i16>, <32 x i16> } @test_int_x86_avx512_mask_psra_wi_512(<32 x i16> %x0, i32 %x1, <32 x i16> %x2, i32 %x3) nounwind { ; X86-LABEL: test_int_x86_avx512_mask_psra_wi_512: ; X86: # %bb.0: +; X86-NEXT: vmovdqa64 %zmm1, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd9] ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] -; X86-NEXT: vpsraw $3, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xe0,0x03] -; X86-NEXT: vpsraw $4, %zmm0, %zmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xc9,0x71,0xe0,0x04] -; X86-NEXT: vpsraw $5, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x71,0xe0,0x05] -; X86-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] -; X86-NEXT: vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0] +; X86-NEXT: vpsraw $3, %zmm0, %zmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x49,0x71,0xe0,0x03] +; X86-NEXT: vpsraw $4, %zmm0, %zmm1 {%k1} {z} # encoding: [0x62,0xf1,0x75,0xc9,0x71,0xe0,0x04] +; X86-NEXT: vpsraw $5, %zmm0, %zmm2 # encoding: [0x62,0xf1,0x6d,0x48,0x71,0xe0,0x05] +; X86-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_psra_wi_512: ; X64: # %bb.0: +; X64-NEXT: vmovdqa64 %zmm1, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd9] ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] -; X64-NEXT: vpsraw $3, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xe0,0x03] -; X64-NEXT: vpsraw $4, %zmm0, %zmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xc9,0x71,0xe0,0x04] -; X64-NEXT: vpsraw $5, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x71,0xe0,0x05] -; X64-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] -; X64-NEXT: vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0] +; X64-NEXT: vpsraw $3, %zmm0, %zmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x49,0x71,0xe0,0x03] +; X64-NEXT: vpsraw $4, %zmm0, %zmm1 {%k1} {z} # encoding: [0x62,0xf1,0x75,0xc9,0x71,0xe0,0x04] +; X64-NEXT: vpsraw $5, %zmm0, %zmm2 # encoding: [0x62,0xf1,0x6d,0x48,0x71,0xe0,0x05] +; X64-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3] ; X64-NEXT: retq # encoding: [0xc3] %res = call <32 x i16> @llvm.x86.avx512.mask.psra.wi.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 %x3) %res1 = call <32 x i16> @llvm.x86.avx512.mask.psra.wi.512(<32 x i16> %x0, i32 4, <32 x i16> zeroinitializer, i32 %x3) %res2 = call <32 x i16> @llvm.x86.avx512.mask.psra.wi.512(<32 x i16> %x0, i32 5, <32 x i16> %x2, i32 -1) - %res3 = add <32 x i16> %res, %res1 - %res4 = add <32 x i16> %res3, %res2 - ret <32 x i16> %res4 + %res3 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } poison, <32 x i16> %res, 0 + %res4 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } %res3, <32 x i16> %res1, 1 + %res5 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } %res4, <32 x i16> %res2, 2 + ret { <32 x i16>, <32 x i16>, <32 x i16> } %res5 } declare <32 x i16> @llvm.x86.avx512.mask.psll.w.512(<32 x i16>, <8 x i16>, <32 x i16>, i32) @@ -1239,32 +1242,33 @@ declare <32 x i16> @llvm.x86.avx512.mask.psll.wi.512(<32 x i16>, i32, <32 x i16>, i32) -define <32 x i16> @test_int_x86_avx512_mask_psll_wi_512(<32 x i16> %x0, i32 %x1, <32 x i16> %x2, i32 %x3) nounwind { +define { <32 x i16>, <32 x i16>, <32 x i16> } @test_int_x86_avx512_mask_psll_wi_512(<32 x i16> %x0, i32 %x1, <32 x i16> %x2, i32 %x3) nounwind { ; X86-LABEL: test_int_x86_avx512_mask_psll_wi_512: ; X86: # %bb.0: +; X86-NEXT: vmovdqa64 %zmm1, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd9] ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] -; X86-NEXT: vpsllw $3, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xf0,0x03] -; X86-NEXT: vpsllw $4, %zmm0, %zmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xc9,0x71,0xf0,0x04] -; X86-NEXT: vpsllw $5, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x71,0xf0,0x05] -; X86-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] -; X86-NEXT: vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0] +; X86-NEXT: vpsllw $3, %zmm0, %zmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x49,0x71,0xf0,0x03] +; X86-NEXT: vpsllw $4, %zmm0, %zmm1 {%k1} {z} # encoding: [0x62,0xf1,0x75,0xc9,0x71,0xf0,0x04] +; X86-NEXT: vpsllw $5, %zmm0, %zmm2 # encoding: [0x62,0xf1,0x6d,0x48,0x71,0xf0,0x05] +; X86-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_psll_wi_512: ; X64: # %bb.0: +; X64-NEXT: vmovdqa64 %zmm1, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd9] ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] -; X64-NEXT: vpsllw $3, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xf0,0x03] -; X64-NEXT: vpsllw $4, %zmm0, %zmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xc9,0x71,0xf0,0x04] -; X64-NEXT: vpsllw $5, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x71,0xf0,0x05] -; X64-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] -; X64-NEXT: vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0] +; X64-NEXT: vpsllw $3, %zmm0, %zmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x49,0x71,0xf0,0x03] +; X64-NEXT: vpsllw $4, %zmm0, %zmm1 {%k1} {z} # encoding: [0x62,0xf1,0x75,0xc9,0x71,0xf0,0x04] +; X64-NEXT: vpsllw $5, %zmm0, %zmm2 # encoding: [0x62,0xf1,0x6d,0x48,0x71,0xf0,0x05] +; X64-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3] ; X64-NEXT: retq # encoding: [0xc3] %res = call <32 x i16> @llvm.x86.avx512.mask.psll.wi.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 %x3) %res1 = call <32 x i16> @llvm.x86.avx512.mask.psll.wi.512(<32 x i16> %x0, i32 4, <32 x i16> zeroinitializer, i32 %x3) %res2 = call <32 x i16> @llvm.x86.avx512.mask.psll.wi.512(<32 x i16> %x0, i32 5, <32 x i16> %x2, i32 -1) - %res3 = add <32 x i16> %res, %res1 - %res4 = add <32 x i16> %res3, %res2 - ret <32 x i16> %res4 + %res3 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } poison, <32 x i16> %res, 0 + %res4 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } %res3, <32 x i16> %res1, 1 + %res5 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } %res4, <32 x i16> %res2, 2 + ret { <32 x i16>, <32 x i16>, <32 x i16> } %res5 } declare <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) @@ -3003,32 +3007,35 @@ declare <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8>, <64 x i8>, i32, <32 x i16>, i32) -define <32 x i16> @test_int_x86_avx512_mask_dbpsadbw_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x3, i32 %x4) nounwind { +define { <32 x i16>, <32 x i16>, <32 x i16> } @test_int_x86_avx512_mask_dbpsadbw_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x3, i32 %x4) nounwind { ; X86-LABEL: test_int_x86_avx512_mask_dbpsadbw_512: ; X86: # %bb.0: +; X86-NEXT: vmovdqa64 %zmm2, %zmm4 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xe2] ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] -; X86-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x42,0xd1,0x02] +; X86-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x42,0xe1,0x02] ; X86-NEXT: vdbpsadbw $3, %zmm1, %zmm0, %zmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x42,0xd9,0x03] -; X86-NEXT: vdbpsadbw $4, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0x7d,0x48,0x42,0xc1,0x04] -; X86-NEXT: vpaddw %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfd,0xc0] -; X86-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] +; X86-NEXT: vdbpsadbw $4, %zmm1, %zmm0, %zmm2 # encoding: [0x62,0xf3,0x7d,0x48,0x42,0xd1,0x04] +; X86-NEXT: vmovdqa64 %zmm4, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc4] +; X86-NEXT: vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_dbpsadbw_512: ; X64: # %bb.0: +; X64-NEXT: vmovdqa64 %zmm2, %zmm4 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xe2] ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] -; X64-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x42,0xd1,0x02] +; X64-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x42,0xe1,0x02] ; X64-NEXT: vdbpsadbw $3, %zmm1, %zmm0, %zmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x42,0xd9,0x03] -; X64-NEXT: vdbpsadbw $4, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0x7d,0x48,0x42,0xc1,0x04] -; X64-NEXT: vpaddw %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfd,0xc0] -; X64-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] +; X64-NEXT: vdbpsadbw $4, %zmm1, %zmm0, %zmm2 # encoding: [0x62,0xf3,0x7d,0x48,0x42,0xd1,0x04] +; X64-NEXT: vmovdqa64 %zmm4, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc4] +; X64-NEXT: vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb] ; X64-NEXT: retq # encoding: [0xc3] %res = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <32 x i16> %x3, i32 %x4) %res1 = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 3, <32 x i16> zeroinitializer, i32 %x4) %res2 = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 4, <32 x i16> %x3, i32 -1) - %res3 = add <32 x i16> %res, %res1 - %res4 = add <32 x i16> %res3, %res2 - ret <32 x i16> %res4 + %res3 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } poison, <32 x i16> %res, 0 + %res4 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } %res3, <32 x i16> %res1, 1 + %res5 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } %res4, <32 x i16> %res2, 2 + ret { <32 x i16>, <32 x i16>, <32 x i16> } %res5 } define <32 x i16> @test_mask_adds_epu16_rr_512(<32 x i16> %a, <32 x i16> %b) nounwind { diff --git a/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll b/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll --- a/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll @@ -1238,25 +1238,27 @@ declare <32 x i16> @llvm.x86.avx512.dbpsadbw.512(<64 x i8>, <64 x i8>, i32) -define <32 x i16>@test_int_x86_avx512_mask_dbpsadbw_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x3, i32 %x4) { +define { <32 x i16>, <32 x i16>, <32 x i16> } @test_int_x86_avx512_mask_dbpsadbw_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x3, i32 %x4) { ; X86-LABEL: test_int_x86_avx512_mask_dbpsadbw_512: ; X86: # %bb.0: +; X86-NEXT: vmovdqa64 %zmm2, %zmm4 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xe2] ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] -; X86-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x42,0xd1,0x02] +; X86-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x42,0xe1,0x02] ; X86-NEXT: vdbpsadbw $3, %zmm1, %zmm0, %zmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x42,0xd9,0x03] -; X86-NEXT: vdbpsadbw $4, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0x7d,0x48,0x42,0xc1,0x04] -; X86-NEXT: vpaddw %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfd,0xc0] -; X86-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] +; X86-NEXT: vdbpsadbw $4, %zmm1, %zmm0, %zmm2 # encoding: [0x62,0xf3,0x7d,0x48,0x42,0xd1,0x04] +; X86-NEXT: vmovdqa64 %zmm4, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc4] +; X86-NEXT: vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_dbpsadbw_512: ; X64: # %bb.0: +; X64-NEXT: vmovdqa64 %zmm2, %zmm4 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xe2] ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] -; X64-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x42,0xd1,0x02] +; X64-NEXT: vdbpsadbw $2, %zmm1, %zmm0, %zmm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x42,0xe1,0x02] ; X64-NEXT: vdbpsadbw $3, %zmm1, %zmm0, %zmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x42,0xd9,0x03] -; X64-NEXT: vdbpsadbw $4, %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf3,0x7d,0x48,0x42,0xc1,0x04] -; X64-NEXT: vpaddw %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfd,0xc0] -; X64-NEXT: vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] +; X64-NEXT: vdbpsadbw $4, %zmm1, %zmm0, %zmm2 # encoding: [0x62,0xf3,0x7d,0x48,0x42,0xd1,0x04] +; X64-NEXT: vmovdqa64 %zmm4, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc4] +; X64-NEXT: vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb] ; X64-NEXT: retq # encoding: [0xc3] %1 = call <32 x i16> @llvm.x86.avx512.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2) %2 = bitcast i32 %x4 to <32 x i1> @@ -1265,24 +1267,26 @@ %5 = bitcast i32 %x4 to <32 x i1> %6 = select <32 x i1> %5, <32 x i16> %4, <32 x i16> zeroinitializer %7 = call <32 x i16> @llvm.x86.avx512.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 4) - %res3 = add <32 x i16> %3, %6 - %res4 = add <32 x i16> %res3, %7 - ret <32 x i16> %res4 + %res1 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } poison, <32 x i16> %3, 0 + %res2 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } %res1, <32 x i16> %6, 1 + %res3 = insertvalue { <32 x i16>, <32 x i16>, <32 x i16> } %res2, <32 x i16> %7, 2 + ret { <32 x i16>, <32 x i16>, <32 x i16> } %res3 } declare <8 x i64> @llvm.x86.avx512.psad.bw.512(<64 x i8>, <64 x i8>) -define <8 x i64>@test_int_x86_avx512_mask_psadb_w_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2){ +define { <8 x i64>, <8 x i64> } @test_int_x86_avx512_mask_psadb_w_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2){ ; CHECK-LABEL: test_int_x86_avx512_mask_psadb_w_512: ; CHECK: # %bb.0: -; CHECK-NEXT: vpsadbw %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf1,0x7d,0x48,0xf6,0xc9] -; CHECK-NEXT: vpsadbw %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xf6,0xc2] -; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0] +; CHECK-NEXT: vpsadbw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xf6,0xd9] +; CHECK-NEXT: vpsadbw %zmm2, %zmm0, %zmm1 # encoding: [0x62,0xf1,0x7d,0x48,0xf6,0xca] +; CHECK-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3] ; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] - %res = call <8 x i64> @llvm.x86.avx512.psad.bw.512(<64 x i8> %x0, <64 x i8> %x1) + %res0 = call <8 x i64> @llvm.x86.avx512.psad.bw.512(<64 x i8> %x0, <64 x i8> %x1) %res1 = call <8 x i64> @llvm.x86.avx512.psad.bw.512(<64 x i8> %x0, <64 x i8> %x2) - %res2 = add <8 x i64> %res, %res1 - ret <8 x i64> %res2 + %res2 = insertvalue { <8 x i64>, <8 x i64> } poison, <8 x i64> %res0, 0 + %res3 = insertvalue { <8 x i64>, <8 x i64> } %res2, <8 x i64> %res1, 1 + ret { <8 x i64>, <8 x i64> } %res3 } declare <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16>, <32 x i16>) nounwind readnone diff --git a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll --- a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll @@ -4,127 +4,123 @@ declare <16 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.128(i8, <16 x i8>, i16) -define <16 x i8>@test_int_x86_avx512_mask_pbroadcast_b_gpr_128(i8 %x0, <16 x i8> %x1, i16 %mask) { +define { <16 x i8>, <16 x i8>, <16 x i8> } @test_int_x86_avx512_mask_pbroadcast_b_gpr_128(i8 %x0, <16 x i8> %x1, i16 %mask) { ; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_128: ; X86: # %bb.0: -; X86-NEXT: vpbroadcastb {{[0-9]+}}(%esp), %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x78,0x4c,0x24,0x04] +; X86-NEXT: vpbroadcastb {{[0-9]+}}(%esp), %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x78,0x5c,0x24,0x04] ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] -; X86-NEXT: vmovdqu8 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0x7f,0x09,0x6f,0xc1] -; X86-NEXT: vmovdqu8 %xmm1, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0x89,0x6f,0xd1] -; X86-NEXT: vpaddb %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc2] -; X86-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0] +; X86-NEXT: vpblendmb %xmm3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x66,0xcb] +; X86-NEXT: vmovdqu8 %xmm3, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0x89,0x6f,0xd3] +; X86-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_128: ; X64: # %bb.0: -; X64-NEXT: vpbroadcastb %edi, %xmm1 # encoding: [0x62,0xf2,0x7d,0x08,0x7a,0xcf] +; X64-NEXT: vmovdqa %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc8] +; X64-NEXT: vpbroadcastb %edi, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7a,0xc7] ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] -; X64-NEXT: vpbroadcastb %edi, %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x7a,0xc7] +; X64-NEXT: vpbroadcastb %edi, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x7a,0xcf] ; X64-NEXT: vpbroadcastb %edi, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x7a,0xd7] -; X64-NEXT: vpaddb %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc2] -; X64-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0] ; X64-NEXT: retq # encoding: [0xc3] - %res = call <16 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.128(i8 %x0, <16 x i8> %x1, i16 -1) + %res0 = call <16 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.128(i8 %x0, <16 x i8> %x1, i16 -1) %res1 = call <16 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.128(i8 %x0, <16 x i8> %x1, i16 %mask) %res2 = call <16 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.128(i8 %x0, <16 x i8> zeroinitializer, i16 %mask) - %res3 = add <16 x i8> %res, %res1 - %res4 = add <16 x i8> %res2, %res3 - ret <16 x i8> %res4 + %res3 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } poison, <16 x i8> %res0, 0 + %res4 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } %res3, <16 x i8> %res1, 1 + %res5 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } %res4, <16 x i8> %res2, 2 + ret { <16 x i8>, <16 x i8>, <16 x i8> } %res5 } declare <8 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.128(i16, <8 x i16>, i8) -define <8 x i16>@test_int_x86_avx512_mask_pbroadcast_w_gpr_128(i16 %x0, <8 x i16> %x1, i8 %mask) { +define { <8 x i16>, <8 x i16>, <8 x i16> } @test_int_x86_avx512_mask_pbroadcast_w_gpr_128(i16 %x0, <8 x i16> %x1, i8 %mask) { ; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_128: ; X86: # %bb.0: -; X86-NEXT: vpbroadcastw {{[0-9]+}}(%esp), %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x79,0x4c,0x24,0x04] +; X86-NEXT: vpbroadcastw {{[0-9]+}}(%esp), %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x79,0x5c,0x24,0x04] ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] -; X86-NEXT: vmovdqu16 %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x6f,0xc1] -; X86-NEXT: vmovdqu16 %xmm1, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0xff,0x89,0x6f,0xd1] -; X86-NEXT: vpaddw %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc2] -; X86-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] +; X86-NEXT: vpblendmw %xmm3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x66,0xcb] +; X86-NEXT: vmovdqu16 %xmm3, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0xff,0x89,0x6f,0xd3] +; X86-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_128: ; X64: # %bb.0: -; X64-NEXT: vpbroadcastw %edi, %xmm1 # encoding: [0x62,0xf2,0x7d,0x08,0x7b,0xcf] +; X64-NEXT: vmovdqa %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc8] +; X64-NEXT: vpbroadcastw %edi, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7b,0xc7] ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] -; X64-NEXT: vpbroadcastw %edi, %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x7b,0xc7] +; X64-NEXT: vpbroadcastw %edi, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x7b,0xcf] ; X64-NEXT: vpbroadcastw %edi, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x7b,0xd7] -; X64-NEXT: vpaddw %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc2] -; X64-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] ; X64-NEXT: retq # encoding: [0xc3] - %res = call <8 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.128(i16 %x0, <8 x i16> %x1, i8 -1) + %res0 = call <8 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.128(i16 %x0, <8 x i16> %x1, i8 -1) %res1 = call <8 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.128(i16 %x0, <8 x i16> %x1, i8 %mask) %res2 = call <8 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.128(i16 %x0, <8 x i16> zeroinitializer, i8 %mask) - %res3 = add <8 x i16> %res, %res1 - %res4 = add <8 x i16> %res2, %res3 - ret <8 x i16> %res4 + %res3 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } poison, <8 x i16> %res0, 0 + %res4 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } %res3, <8 x i16> %res1, 1 + %res5 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } %res4, <8 x i16> %res2, 2 + ret { <8 x i16>, <8 x i16>, <8 x i16> } %res5 } declare <32 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.256(i8, <32 x i8>, i32) -define <32 x i8>@test_int_x86_avx512_mask_pbroadcast_b_gpr_256(i8 %x0, <32 x i8> %x1, i32 %mask) { +define { <32 x i8>, <32 x i8>, <32 x i8> } @test_int_x86_avx512_mask_pbroadcast_b_gpr_256(i8 %x0, <32 x i8> %x1, i32 %mask) { ; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_256: ; X86: # %bb.0: -; X86-NEXT: vpbroadcastb {{[0-9]+}}(%esp), %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x78,0x4c,0x24,0x04] +; X86-NEXT: vpbroadcastb {{[0-9]+}}(%esp), %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x78,0x5c,0x24,0x04] ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] -; X86-NEXT: vmovdqu8 %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0x7f,0x29,0x6f,0xc1] -; X86-NEXT: vmovdqu8 %ymm1, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xa9,0x6f,0xd1] -; X86-NEXT: vpaddb %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfc,0xc2] -; X86-NEXT: vpaddb %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfc,0xc0] +; X86-NEXT: vpblendmb %ymm3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x66,0xcb] +; X86-NEXT: vmovdqu8 %ymm3, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xa9,0x6f,0xd3] +; X86-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_256: ; X64: # %bb.0: -; X64-NEXT: vpbroadcastb %edi, %ymm1 # encoding: [0x62,0xf2,0x7d,0x28,0x7a,0xcf] +; X64-NEXT: vmovdqa %ymm0, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc8] +; X64-NEXT: vpbroadcastb %edi, %ymm0 # encoding: [0x62,0xf2,0x7d,0x28,0x7a,0xc7] ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] -; X64-NEXT: vpbroadcastb %edi, %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x7a,0xc7] +; X64-NEXT: vpbroadcastb %edi, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x7a,0xcf] ; X64-NEXT: vpbroadcastb %edi, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x7a,0xd7] -; X64-NEXT: vpaddb %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfc,0xc2] -; X64-NEXT: vpaddb %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfc,0xc0] ; X64-NEXT: retq # encoding: [0xc3] - %res = call <32 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.256(i8 %x0, <32 x i8> %x1, i32 -1) + %res0 = call <32 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.256(i8 %x0, <32 x i8> %x1, i32 -1) %res1 = call <32 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.256(i8 %x0, <32 x i8> %x1, i32 %mask) %res2 = call <32 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.256(i8 %x0, <32 x i8> zeroinitializer, i32 %mask) - %res3 = add <32 x i8> %res, %res1 - %res4 = add <32 x i8> %res2, %res3 - ret <32 x i8> %res4 + %res3 = insertvalue { <32 x i8>, <32 x i8>, <32 x i8> } poison, <32 x i8> %res0, 0 + %res4 = insertvalue { <32 x i8>, <32 x i8>, <32 x i8> } %res3, <32 x i8> %res1, 1 + %res5 = insertvalue { <32 x i8>, <32 x i8>, <32 x i8> } %res4, <32 x i8> %res2, 2 + ret { <32 x i8>, <32 x i8>, <32 x i8> } %res5 } declare <16 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.256(i16, <16 x i16>, i16) -define <16 x i16>@test_int_x86_avx512_mask_pbroadcast_w_gpr_256(i16 %x0, <16 x i16> %x1, i16 %mask) { +define { <16 x i16>, <16 x i16>, <16 x i16> } @test_int_x86_avx512_mask_pbroadcast_w_gpr_256(i16 %x0, <16 x i16> %x1, i16 %mask) { ; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_256: ; X86: # %bb.0: -; X86-NEXT: vpbroadcastw {{[0-9]+}}(%esp), %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x79,0x4c,0x24,0x04] +; X86-NEXT: vpbroadcastw {{[0-9]+}}(%esp), %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x79,0x5c,0x24,0x04] ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] -; X86-NEXT: vmovdqu16 %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf1,0xff,0x29,0x6f,0xc1] -; X86-NEXT: vmovdqu16 %ymm1, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0xff,0xa9,0x6f,0xd1] -; X86-NEXT: vpaddw %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc2] -; X86-NEXT: vpaddw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] +; X86-NEXT: vpblendmw %ymm3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x66,0xcb] +; X86-NEXT: vmovdqu16 %ymm3, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0xff,0xa9,0x6f,0xd3] +; X86-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_256: ; X64: # %bb.0: -; X64-NEXT: vpbroadcastw %edi, %ymm1 # encoding: [0x62,0xf2,0x7d,0x28,0x7b,0xcf] +; X64-NEXT: vmovdqa %ymm0, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc8] +; X64-NEXT: vpbroadcastw %edi, %ymm0 # encoding: [0x62,0xf2,0x7d,0x28,0x7b,0xc7] ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] -; X64-NEXT: vpbroadcastw %edi, %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x7b,0xc7] +; X64-NEXT: vpbroadcastw %edi, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x7b,0xcf] ; X64-NEXT: vpbroadcastw %edi, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x7b,0xd7] -; X64-NEXT: vpaddw %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc2] -; X64-NEXT: vpaddw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] ; X64-NEXT: retq # encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.256(i16 %x0, <16 x i16> %x1, i16 -1) %res1 = call <16 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.256(i16 %x0, <16 x i16> %x1, i16 %mask) %res2 = call <16 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.256(i16 %x0, <16 x i16> zeroinitializer, i16 %mask) - %res3 = add <16 x i16> %res, %res1 - %res4 = add <16 x i16> %res2, %res3 - ret <16 x i16> %res4 + %res3 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } poison, <16 x i16> %res, 0 + %res4 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } %res3, <16 x i16> %res1, 1 + %res5 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } %res4, <16 x i16> %res2, 2 + ret { <16 x i16>, <16 x i16>, <16 x i16> } %res5 } declare <32 x i8> @llvm.x86.avx512.pbroadcastb.256(<16 x i8>, <32 x i8>, i32) @@ -498,7 +494,7 @@ declare <8 x i16> @llvm.x86.avx512.mask.loadu.w.128(i8*, <8 x i16>, i8) -define <8 x i16>@test_int_x86_avx512_mask_loadu_w_128(i8* %ptr, i8* %ptr2, <8 x i16> %x1, i8 %mask) { +define { <8 x i16>, <8 x i16>, <8 x i16> } @test_int_x86_avx512_mask_loadu_w_128(i8* %ptr, i8* %ptr2, <8 x i16> %x1, i8 %mask) { ; X86-LABEL: test_int_x86_avx512_mask_loadu_w_128: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] @@ -506,111 +502,111 @@ ; X86-NEXT: vmovdqu (%ecx), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x01] ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c] ; X86-NEXT: kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca] -; X86-NEXT: vmovdqu16 (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x6f,0x00] -; X86-NEXT: vmovdqu16 (%ecx), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0xff,0x89,0x6f,0x09] -; X86-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc1] +; X86-NEXT: vpblendmw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x66,0x08] +; X86-NEXT: vmovdqu16 (%ecx), %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0xff,0x89,0x6f,0x11] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_loadu_w_128: ; X64: # %bb.0: ; X64-NEXT: vmovdqu (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x07] ; X64-NEXT: kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca] -; X64-NEXT: vmovdqu16 (%rsi), %xmm0 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x6f,0x06] -; X64-NEXT: vmovdqu16 (%rdi), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0xff,0x89,0x6f,0x0f] -; X64-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc1] +; X64-NEXT: vpblendmw (%rsi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x66,0x0e] +; X64-NEXT: vmovdqu16 (%rdi), %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0xff,0x89,0x6f,0x17] ; X64-NEXT: retq # encoding: [0xc3] %res0 = call <8 x i16> @llvm.x86.avx512.mask.loadu.w.128(i8* %ptr, <8 x i16> %x1, i8 -1) - %res = call <8 x i16> @llvm.x86.avx512.mask.loadu.w.128(i8* %ptr2, <8 x i16> %res0, i8 %mask) - %res1 = call <8 x i16> @llvm.x86.avx512.mask.loadu.w.128(i8* %ptr, <8 x i16> zeroinitializer, i8 %mask) - %res2 = add <8 x i16> %res, %res1 - ret <8 x i16> %res2 + %res1 = call <8 x i16> @llvm.x86.avx512.mask.loadu.w.128(i8* %ptr2, <8 x i16> %res0, i8 %mask) + %res2 = call <8 x i16> @llvm.x86.avx512.mask.loadu.w.128(i8* %ptr, <8 x i16> zeroinitializer, i8 %mask) + %res3 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } poison, <8 x i16> %res0, 0 + %res4 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } %res3, <8 x i16> %res1, 1 + %res5 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } %res4, <8 x i16> %res2, 2 + ret { <8 x i16>, <8 x i16>, <8 x i16> } %res5 } declare <16 x i16> @llvm.x86.avx512.mask.loadu.w.256(i8*, <16 x i16>, i16) -define <16 x i16>@test_int_x86_avx512_mask_loadu_w_256(i8* %ptr, i8* %ptr2, <16 x i16> %x1, i16 %mask) { +define { <16 x i16>, <16 x i16>, <16 x i16> } @test_int_x86_avx512_mask_loadu_w_256(i8* %ptr, i8* %ptr2, <16 x i16> %x1, i16 %mask) { ; X86-LABEL: test_int_x86_avx512_mask_loadu_w_256: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] ; X86-NEXT: vmovdqu (%ecx), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x6f,0x01] ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x0c] -; X86-NEXT: vmovdqu16 (%eax), %ymm0 {%k1} # encoding: [0x62,0xf1,0xff,0x29,0x6f,0x00] -; X86-NEXT: vmovdqu16 (%ecx), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0xff,0xa9,0x6f,0x09] -; X86-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc1] +; X86-NEXT: vpblendmw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x66,0x08] +; X86-NEXT: vmovdqu16 (%ecx), %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0xff,0xa9,0x6f,0x11] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_loadu_w_256: ; X64: # %bb.0: ; X64-NEXT: vmovdqu (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x6f,0x07] ; X64-NEXT: kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca] -; X64-NEXT: vmovdqu16 (%rsi), %ymm0 {%k1} # encoding: [0x62,0xf1,0xff,0x29,0x6f,0x06] -; X64-NEXT: vmovdqu16 (%rdi), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0xff,0xa9,0x6f,0x0f] -; X64-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc1] +; X64-NEXT: vpblendmw (%rsi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x66,0x0e] +; X64-NEXT: vmovdqu16 (%rdi), %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0xff,0xa9,0x6f,0x17] ; X64-NEXT: retq # encoding: [0xc3] %res0 = call <16 x i16> @llvm.x86.avx512.mask.loadu.w.256(i8* %ptr, <16 x i16> %x1, i16 -1) - %res = call <16 x i16> @llvm.x86.avx512.mask.loadu.w.256(i8* %ptr2, <16 x i16> %res0, i16 %mask) - %res1 = call <16 x i16> @llvm.x86.avx512.mask.loadu.w.256(i8* %ptr, <16 x i16> zeroinitializer, i16 %mask) - %res2 = add <16 x i16> %res, %res1 - ret <16 x i16> %res2 + %res1 = call <16 x i16> @llvm.x86.avx512.mask.loadu.w.256(i8* %ptr2, <16 x i16> %res0, i16 %mask) + %res2 = call <16 x i16> @llvm.x86.avx512.mask.loadu.w.256(i8* %ptr, <16 x i16> zeroinitializer, i16 %mask) + %res3 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } poison, <16 x i16> %res0, 0 + %res4 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } %res3, <16 x i16> %res1, 1 + %res5 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } %res4, <16 x i16> %res2, 2 + ret { <16 x i16>, <16 x i16>, <16 x i16> } %res5 } declare <16 x i8> @llvm.x86.avx512.mask.loadu.b.128(i8*, <16 x i8>, i16) -define <16 x i8>@test_int_x86_avx512_mask_loadu_b_128(i8* %ptr, i8* %ptr2, <16 x i8> %x1, i16 %mask) { +define { <16 x i8>, <16 x i8>, <16 x i8> } @test_int_x86_avx512_mask_loadu_b_128(i8* %ptr, i8* %ptr2, <16 x i8> %x1, i16 %mask) { ; X86-LABEL: test_int_x86_avx512_mask_loadu_b_128: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] ; X86-NEXT: vmovdqu (%ecx), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x01] ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x0c] -; X86-NEXT: vmovdqu8 (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7f,0x09,0x6f,0x00] -; X86-NEXT: vmovdqu8 (%ecx), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0x89,0x6f,0x09] -; X86-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc1] +; X86-NEXT: vpblendmb (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x66,0x08] +; X86-NEXT: vmovdqu8 (%ecx), %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0x89,0x6f,0x11] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_loadu_b_128: ; X64: # %bb.0: ; X64-NEXT: vmovdqu (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x07] ; X64-NEXT: kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca] -; X64-NEXT: vmovdqu8 (%rsi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7f,0x09,0x6f,0x06] -; X64-NEXT: vmovdqu8 (%rdi), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0x89,0x6f,0x0f] -; X64-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc1] +; X64-NEXT: vpblendmb (%rsi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x66,0x0e] +; X64-NEXT: vmovdqu8 (%rdi), %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0x89,0x6f,0x17] ; X64-NEXT: retq # encoding: [0xc3] %res0 = call <16 x i8> @llvm.x86.avx512.mask.loadu.b.128(i8* %ptr, <16 x i8> %x1, i16 -1) - %res = call <16 x i8> @llvm.x86.avx512.mask.loadu.b.128(i8* %ptr2, <16 x i8> %res0, i16 %mask) - %res1 = call <16 x i8> @llvm.x86.avx512.mask.loadu.b.128(i8* %ptr, <16 x i8> zeroinitializer, i16 %mask) - %res2 = add <16 x i8> %res, %res1 - ret <16 x i8> %res2 + %res1 = call <16 x i8> @llvm.x86.avx512.mask.loadu.b.128(i8* %ptr2, <16 x i8> %res0, i16 %mask) + %res2 = call <16 x i8> @llvm.x86.avx512.mask.loadu.b.128(i8* %ptr, <16 x i8> zeroinitializer, i16 %mask) + %res3 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } poison, <16 x i8> %res0, 0 + %res4 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } %res3, <16 x i8> %res1, 1 + %res5 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } %res4, <16 x i8> %res2, 2 + ret { <16 x i8>, <16 x i8>, <16 x i8> } %res5 } declare <32 x i8> @llvm.x86.avx512.mask.loadu.b.256(i8*, <32 x i8>, i32) -define <32 x i8>@test_int_x86_avx512_mask_loadu_b_256(i8* %ptr, i8* %ptr2, <32 x i8> %x1, i32 %mask) { +define { <32 x i8>, <32 x i8>, <32 x i8> } @test_int_x86_avx512_mask_loadu_b_256(i8* %ptr, i8* %ptr2, <32 x i8> %x1, i32 %mask) { ; X86-LABEL: test_int_x86_avx512_mask_loadu_b_256: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] ; X86-NEXT: vmovdqu (%ecx), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x6f,0x01] ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x0c] -; X86-NEXT: vmovdqu8 (%eax), %ymm0 {%k1} # encoding: [0x62,0xf1,0x7f,0x29,0x6f,0x00] -; X86-NEXT: vmovdqu8 (%ecx), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xa9,0x6f,0x09] -; X86-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfc,0xc1] +; X86-NEXT: vpblendmb (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x66,0x08] +; X86-NEXT: vmovdqu8 (%ecx), %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xa9,0x6f,0x11] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_loadu_b_256: ; X64: # %bb.0: ; X64-NEXT: vmovdqu (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x6f,0x07] ; X64-NEXT: kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca] -; X64-NEXT: vmovdqu8 (%rsi), %ymm0 {%k1} # encoding: [0x62,0xf1,0x7f,0x29,0x6f,0x06] -; X64-NEXT: vmovdqu8 (%rdi), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xa9,0x6f,0x0f] -; X64-NEXT: vpaddb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfc,0xc1] +; X64-NEXT: vpblendmb (%rsi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x66,0x0e] +; X64-NEXT: vmovdqu8 (%rdi), %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xa9,0x6f,0x17] ; X64-NEXT: retq # encoding: [0xc3] %res0 = call <32 x i8> @llvm.x86.avx512.mask.loadu.b.256(i8* %ptr, <32 x i8> %x1, i32 -1) - %res = call <32 x i8> @llvm.x86.avx512.mask.loadu.b.256(i8* %ptr2, <32 x i8> %res0, i32 %mask) - %res1 = call <32 x i8> @llvm.x86.avx512.mask.loadu.b.256(i8* %ptr, <32 x i8> zeroinitializer, i32 %mask) - %res2 = add <32 x i8> %res, %res1 - ret <32 x i8> %res2 + %res1 = call <32 x i8> @llvm.x86.avx512.mask.loadu.b.256(i8* %ptr2, <32 x i8> %res0, i32 %mask) + %res2 = call <32 x i8> @llvm.x86.avx512.mask.loadu.b.256(i8* %ptr, <32 x i8> zeroinitializer, i32 %mask) + %res3 = insertvalue { <32 x i8>, <32 x i8>, <32 x i8> } poison, <32 x i8> %res0, 0 + %res4 = insertvalue { <32 x i8>, <32 x i8>, <32 x i8> } %res3, <32 x i8> %res1, 1 + %res5 = insertvalue { <32 x i8>, <32 x i8>, <32 x i8> } %res4, <32 x i8> %res2, 2 + ret { <32 x i8>, <32 x i8>, <32 x i8> } %res5 } declare <16 x i8> @llvm.x86.avx512.mask.palignr.128(<16 x i8>, <16 x i8>, i32, <16 x i8>, i16) @@ -3151,185 +3147,191 @@ declare <8 x i16> @llvm.x86.avx512.mask.psrl.wi.128(<8 x i16>, i32, <8 x i16>, i8) -define <8 x i16>@test_int_x86_avx512_mask_psrl_wi_128(<8 x i16> %x0, i32 %x1, <8 x i16> %x2, i8 %x3) { +define { <8 x i16>, <8 x i16>, <8 x i16> } @test_int_x86_avx512_mask_psrl_wi_128(<8 x i16> %x0, i32 %x1, <8 x i16> %x2, i8 %x3) { ; X86-LABEL: test_int_x86_avx512_mask_psrl_wi_128: ; X86: # %bb.0: +; X86-NEXT: vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9] ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] -; X86-NEXT: vpsrlw $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x71,0xd0,0x03] -; X86-NEXT: vpsrlw $4, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x71,0xd0,0x04] -; X86-NEXT: vpsrlw $5, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x71,0xd0,0x05] -; X86-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] -; X86-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] +; X86-NEXT: vpsrlw $3, %xmm0, %xmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x09,0x71,0xd0,0x03] +; X86-NEXT: vpsrlw $4, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x71,0xd0,0x04] +; X86-NEXT: vpsrlw $5, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0x89,0x71,0xd0,0x05] +; X86-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_psrl_wi_128: ; X64: # %bb.0: +; X64-NEXT: vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9] ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] -; X64-NEXT: vpsrlw $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x71,0xd0,0x03] -; X64-NEXT: vpsrlw $4, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x71,0xd0,0x04] -; X64-NEXT: vpsrlw $5, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x71,0xd0,0x05] -; X64-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] -; X64-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] +; X64-NEXT: vpsrlw $3, %xmm0, %xmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x09,0x71,0xd0,0x03] +; X64-NEXT: vpsrlw $4, %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x71,0xd0,0x04] +; X64-NEXT: vpsrlw $5, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0x89,0x71,0xd0,0x05] +; X64-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] ; X64-NEXT: retq # encoding: [0xc3] - %res = call <8 x i16> @llvm.x86.avx512.mask.psrl.wi.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 %x3) + %res0 = call <8 x i16> @llvm.x86.avx512.mask.psrl.wi.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 %x3) %res1 = call <8 x i16> @llvm.x86.avx512.mask.psrl.wi.128(<8 x i16> %x0, i32 4, <8 x i16> %x2, i8 -1) %res2 = call <8 x i16> @llvm.x86.avx512.mask.psrl.wi.128(<8 x i16> %x0, i32 5, <8 x i16> zeroinitializer, i8 %x3) - %res3 = add <8 x i16> %res, %res1 - %res4 = add <8 x i16> %res2, %res3 - ret <8 x i16> %res4 + %res3 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } poison, <8 x i16> %res0, 0 + %res4 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } %res3, <8 x i16> %res1, 1 + %res5 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } %res4, <8 x i16> %res2, 2 + ret { <8 x i16>, <8 x i16>, <8 x i16> } %res5 } declare <16 x i16> @llvm.x86.avx512.mask.psrl.wi.256(<16 x i16>, i32, <16 x i16>, i16) -define <16 x i16>@test_int_x86_avx512_mask_psrl_wi_256(<16 x i16> %x0, i32 %x1, <16 x i16> %x2, i16 %x3) { +define { <16 x i16>, <16 x i16>, <16 x i16> } @test_int_x86_avx512_mask_psrl_wi_256(<16 x i16> %x0, i32 %x1, <16 x i16> %x2, i16 %x3) { ; X86-LABEL: test_int_x86_avx512_mask_psrl_wi_256: ; X86: # %bb.0: +; X86-NEXT: vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9] ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] -; X86-NEXT: vpsrlw $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x71,0xd0,0x03] -; X86-NEXT: vpsrlw $4, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x71,0xd0,0x04] -; X86-NEXT: vpsrlw $5, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x71,0xd0,0x05] -; X86-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] -; X86-NEXT: vpaddw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] +; X86-NEXT: vpsrlw $3, %ymm0, %ymm3 {%k1} # encoding: [0x62,0xf1,0x65,0x29,0x71,0xd0,0x03] +; X86-NEXT: vpsrlw $4, %ymm0, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x71,0xd0,0x04] +; X86-NEXT: vpsrlw $5, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xa9,0x71,0xd0,0x05] +; X86-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_psrl_wi_256: ; X64: # %bb.0: +; X64-NEXT: vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9] ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] -; X64-NEXT: vpsrlw $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x71,0xd0,0x03] -; X64-NEXT: vpsrlw $4, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x71,0xd0,0x04] -; X64-NEXT: vpsrlw $5, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x71,0xd0,0x05] -; X64-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] -; X64-NEXT: vpaddw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] +; X64-NEXT: vpsrlw $3, %ymm0, %ymm3 {%k1} # encoding: [0x62,0xf1,0x65,0x29,0x71,0xd0,0x03] +; X64-NEXT: vpsrlw $4, %ymm0, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x71,0xd0,0x04] +; X64-NEXT: vpsrlw $5, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xa9,0x71,0xd0,0x05] +; X64-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3] ; X64-NEXT: retq # encoding: [0xc3] - %res = call <16 x i16> @llvm.x86.avx512.mask.psrl.wi.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 %x3) + %res0 = call <16 x i16> @llvm.x86.avx512.mask.psrl.wi.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 %x3) %res1 = call <16 x i16> @llvm.x86.avx512.mask.psrl.wi.256(<16 x i16> %x0, i32 4, <16 x i16> %x2, i16 -1) %res2 = call <16 x i16> @llvm.x86.avx512.mask.psrl.wi.256(<16 x i16> %x0, i32 5, <16 x i16> zeroinitializer, i16 %x3) - %res3 = add <16 x i16> %res, %res1 - %res4 = add <16 x i16> %res3, %res2 - ret <16 x i16> %res4 + %res3 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } poison, <16 x i16> %res0, 0 + %res4 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } %res3, <16 x i16> %res1, 1 + %res5 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } %res4, <16 x i16> %res2, 2 + ret { <16 x i16>, <16 x i16>, <16 x i16> } %res5 } declare <8 x i16> @llvm.x86.avx512.mask.psra.wi.128(<8 x i16>, i32, <8 x i16>, i8) -define <8 x i16>@test_int_x86_avx512_mask_psra_wi_128(<8 x i16> %x0, i32 %x1, <8 x i16> %x2, i8 %x3) { +define { <8 x i16>, <8 x i16>, <8 x i16> } @test_int_x86_avx512_mask_psra_wi_128(<8 x i16> %x0, i32 %x1, <8 x i16> %x2, i8 %x3) { ; X86-LABEL: test_int_x86_avx512_mask_psra_wi_128: ; X86: # %bb.0: +; X86-NEXT: vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9] ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] -; X86-NEXT: vpsraw $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x71,0xe0,0x03] -; X86-NEXT: vpsraw $4, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0x89,0x71,0xe0,0x04] -; X86-NEXT: vpsraw $5, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x71,0xe0,0x05] -; X86-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] -; X86-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] +; X86-NEXT: vpsraw $3, %xmm0, %xmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x09,0x71,0xe0,0x03] +; X86-NEXT: vpsraw $4, %xmm0, %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0x75,0x89,0x71,0xe0,0x04] +; X86-NEXT: vpsraw $5, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x71,0xe0,0x05] +; X86-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_psra_wi_128: ; X64: # %bb.0: +; X64-NEXT: vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9] ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] -; X64-NEXT: vpsraw $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x71,0xe0,0x03] -; X64-NEXT: vpsraw $4, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0x89,0x71,0xe0,0x04] -; X64-NEXT: vpsraw $5, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x71,0xe0,0x05] -; X64-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] -; X64-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] +; X64-NEXT: vpsraw $3, %xmm0, %xmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x09,0x71,0xe0,0x03] +; X64-NEXT: vpsraw $4, %xmm0, %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0x75,0x89,0x71,0xe0,0x04] +; X64-NEXT: vpsraw $5, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x71,0xe0,0x05] +; X64-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] ; X64-NEXT: retq # encoding: [0xc3] - %res = call <8 x i16> @llvm.x86.avx512.mask.psra.wi.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 %x3) + %res0 = call <8 x i16> @llvm.x86.avx512.mask.psra.wi.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 %x3) %res1 = call <8 x i16> @llvm.x86.avx512.mask.psra.wi.128(<8 x i16> %x0, i32 4, <8 x i16> zeroinitializer, i8 %x3) %res2 = call <8 x i16> @llvm.x86.avx512.mask.psra.wi.128(<8 x i16> %x0, i32 5, <8 x i16> %x2, i8 -1) - %res3 = add <8 x i16> %res, %res1 - %res4 = add <8 x i16> %res3, %res2 - ret <8 x i16> %res4 + %res3 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } poison, <8 x i16> %res0, 0 + %res4 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } %res3, <8 x i16> %res1, 1 + %res5 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } %res4, <8 x i16> %res2, 2 + ret { <8 x i16>, <8 x i16>, <8 x i16> } %res5 } declare <16 x i16> @llvm.x86.avx512.mask.psra.wi.256(<16 x i16>, i32, <16 x i16>, i16) -define <16 x i16>@test_int_x86_avx512_mask_psra_wi_256(<16 x i16> %x0, i32 %x1, <16 x i16> %x2, i16 %x3) { +define { <16 x i16>, <16 x i16>, <16 x i16> } @test_int_x86_avx512_mask_psra_wi_256(<16 x i16> %x0, i32 %x1, <16 x i16> %x2, i16 %x3) { ; X86-LABEL: test_int_x86_avx512_mask_psra_wi_256: ; X86: # %bb.0: +; X86-NEXT: vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9] ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] -; X86-NEXT: vpsraw $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x71,0xe0,0x03] -; X86-NEXT: vpsraw $4, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xa9,0x71,0xe0,0x04] -; X86-NEXT: vpsraw $5, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x71,0xe0,0x05] -; X86-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] -; X86-NEXT: vpaddw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] +; X86-NEXT: vpsraw $3, %ymm0, %ymm3 {%k1} # encoding: [0x62,0xf1,0x65,0x29,0x71,0xe0,0x03] +; X86-NEXT: vpsraw $4, %ymm0, %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x75,0xa9,0x71,0xe0,0x04] +; X86-NEXT: vpsraw $5, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x71,0xe0,0x05] +; X86-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_psra_wi_256: ; X64: # %bb.0: +; X64-NEXT: vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9] ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] -; X64-NEXT: vpsraw $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x71,0xe0,0x03] -; X64-NEXT: vpsraw $4, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xa9,0x71,0xe0,0x04] -; X64-NEXT: vpsraw $5, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x71,0xe0,0x05] -; X64-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] -; X64-NEXT: vpaddw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] +; X64-NEXT: vpsraw $3, %ymm0, %ymm3 {%k1} # encoding: [0x62,0xf1,0x65,0x29,0x71,0xe0,0x03] +; X64-NEXT: vpsraw $4, %ymm0, %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x75,0xa9,0x71,0xe0,0x04] +; X64-NEXT: vpsraw $5, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x71,0xe0,0x05] +; X64-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3] ; X64-NEXT: retq # encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.psra.wi.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 %x3) %res1 = call <16 x i16> @llvm.x86.avx512.mask.psra.wi.256(<16 x i16> %x0, i32 4, <16 x i16> zeroinitializer, i16 %x3) %res2 = call <16 x i16> @llvm.x86.avx512.mask.psra.wi.256(<16 x i16> %x0, i32 5, <16 x i16> %x2, i16 -1) - %res3 = add <16 x i16> %res, %res1 - %res4 = add <16 x i16> %res3, %res2 - ret <16 x i16> %res4 + %res3 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } poison, <16 x i16> %res, 0 + %res4 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } %res3, <16 x i16> %res1, 1 + %res5 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } %res4, <16 x i16> %res2, 2 + ret { <16 x i16>, <16 x i16>, <16 x i16> } %res5 } declare <8 x i16> @llvm.x86.avx512.mask.psll.wi.128(<8 x i16>, i32, <8 x i16>, i8) -define <8 x i16>@test_int_x86_avx512_mask_psll_wi_128(<8 x i16> %x0, i32 %x1, <8 x i16> %x2, i8 %x3) { +define { <8 x i16>, <8 x i16>, <8 x i16> } @test_int_x86_avx512_mask_psll_wi_128(<8 x i16> %x0, i32 %x1, <8 x i16> %x2, i8 %x3) { ; X86-LABEL: test_int_x86_avx512_mask_psll_wi_128: ; X86: # %bb.0: +; X86-NEXT: vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9] ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] -; X86-NEXT: vpsllw $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x71,0xf0,0x03] -; X86-NEXT: vpsllw $4, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0x89,0x71,0xf0,0x04] -; X86-NEXT: vpsllw $5, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x71,0xf0,0x05] -; X86-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] -; X86-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] +; X86-NEXT: vpsllw $3, %xmm0, %xmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x09,0x71,0xf0,0x03] +; X86-NEXT: vpsllw $4, %xmm0, %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0x75,0x89,0x71,0xf0,0x04] +; X86-NEXT: vpsllw $5, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x71,0xf0,0x05] +; X86-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_psll_wi_128: ; X64: # %bb.0: +; X64-NEXT: vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9] ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] -; X64-NEXT: vpsllw $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x71,0xf0,0x03] -; X64-NEXT: vpsllw $4, %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0x89,0x71,0xf0,0x04] -; X64-NEXT: vpsllw $5, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x71,0xf0,0x05] -; X64-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] -; X64-NEXT: vpaddw %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0] +; X64-NEXT: vpsllw $3, %xmm0, %xmm3 {%k1} # encoding: [0x62,0xf1,0x65,0x09,0x71,0xf0,0x03] +; X64-NEXT: vpsllw $4, %xmm0, %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0x75,0x89,0x71,0xf0,0x04] +; X64-NEXT: vpsllw $5, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x71,0xf0,0x05] +; X64-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] ; X64-NEXT: retq # encoding: [0xc3] - %res = call <8 x i16> @llvm.x86.avx512.mask.psll.wi.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 %x3) + %res0 = call <8 x i16> @llvm.x86.avx512.mask.psll.wi.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 %x3) %res1 = call <8 x i16> @llvm.x86.avx512.mask.psll.wi.128(<8 x i16> %x0, i32 4, <8 x i16> zeroinitializer, i8 %x3) %res2 = call <8 x i16> @llvm.x86.avx512.mask.psll.wi.128(<8 x i16> %x0, i32 5, <8 x i16> %x2, i8 -1) - %res3 = add <8 x i16> %res, %res1 - %res4 = add <8 x i16> %res3, %res2 - ret <8 x i16> %res4 + %res3 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } poison, <8 x i16> %res0, 0 + %res4 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } %res3, <8 x i16> %res1, 1 + %res5 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } %res4, <8 x i16> %res2, 2 + ret { <8 x i16>, <8 x i16>, <8 x i16> } %res5 } declare <16 x i16> @llvm.x86.avx512.mask.psll.wi.256(<16 x i16>, i32, <16 x i16>, i16) -define <16 x i16>@test_int_x86_avx512_mask_psll_wi_256(<16 x i16> %x0, i32 %x1, <16 x i16> %x2, i16 %x3) { +define { <16 x i16>, <16 x i16>, <16 x i16> } @test_int_x86_avx512_mask_psll_wi_256(<16 x i16> %x0, i32 %x1, <16 x i16> %x2, i16 %x3) { ; X86-LABEL: test_int_x86_avx512_mask_psll_wi_256: ; X86: # %bb.0: +; X86-NEXT: vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9] ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] -; X86-NEXT: vpsllw $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x71,0xf0,0x03] -; X86-NEXT: vpsllw $4, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xa9,0x71,0xf0,0x04] -; X86-NEXT: vpsllw $5, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x71,0xf0,0x05] -; X86-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] -; X86-NEXT: vpaddw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] +; X86-NEXT: vpsllw $3, %ymm0, %ymm3 {%k1} # encoding: [0x62,0xf1,0x65,0x29,0x71,0xf0,0x03] +; X86-NEXT: vpsllw $4, %ymm0, %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x75,0xa9,0x71,0xf0,0x04] +; X86-NEXT: vpsllw $5, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x71,0xf0,0x05] +; X86-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_psll_wi_256: ; X64: # %bb.0: +; X64-NEXT: vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9] ; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] -; X64-NEXT: vpsllw $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x71,0xf0,0x03] -; X64-NEXT: vpsllw $4, %ymm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf1,0x6d,0xa9,0x71,0xf0,0x04] -; X64-NEXT: vpsllw $5, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x71,0xf0,0x05] -; X64-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] -; X64-NEXT: vpaddw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0] +; X64-NEXT: vpsllw $3, %ymm0, %ymm3 {%k1} # encoding: [0x62,0xf1,0x65,0x29,0x71,0xf0,0x03] +; X64-NEXT: vpsllw $4, %ymm0, %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x75,0xa9,0x71,0xf0,0x04] +; X64-NEXT: vpsllw $5, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x71,0xf0,0x05] +; X64-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3] ; X64-NEXT: retq # encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.psll.wi.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 %x3) %res1 = call <16 x i16> @llvm.x86.avx512.mask.psll.wi.256(<16 x i16> %x0, i32 4, <16 x i16> zeroinitializer, i16 %x3) %res2 = call <16 x i16> @llvm.x86.avx512.mask.psll.wi.256(<16 x i16> %x0, i32 5, <16 x i16> %x2, i16 -1) - %res3 = add <16 x i16> %res, %res1 - %res4 = add <16 x i16> %res3, %res2 - ret <16 x i16> %res4 + %res3 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } poison, <16 x i16> %res, 0 + %res4 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } %res3, <16 x i16> %res1, 1 + %res5 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } %res4, <16 x i16> %res2, 2 + ret { <16 x i16>, <16 x i16>, <16 x i16> } %res5 } declare <16 x i8> @llvm.x86.avx512.mask.pshuf.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) @@ -6925,63 +6927,69 @@ declare <8 x i16> @llvm.x86.avx512.mask.dbpsadbw.128(<16 x i8>, <16 x i8>, i32, <8 x i16>, i8) -define <8 x i16>@test_int_x86_avx512_mask_dbpsadbw_128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x3, i8 %x4) { +define { <8 x i16>, <8 x i16>, <8 x i16> } @test_int_x86_avx512_mask_dbpsadbw_128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x3, i8 %x4) { ; X86-LABEL: test_int_x86_avx512_mask_dbpsadbw_128: ; X86: # %bb.0: +; X86-NEXT: vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2] ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] -; X86-NEXT: vdbpsadbw $2, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x42,0xd1,0x02] +; X86-NEXT: vdbpsadbw $2, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x42,0xe1,0x02] ; X86-NEXT: vdbpsadbw $3, %xmm1, %xmm0, %xmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x42,0xd9,0x03] -; X86-NEXT: vdbpsadbw $4, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7d,0x08,0x42,0xc1,0x04] -; X86-NEXT: vpaddw %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfd,0xc0] -; X86-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] +; X86-NEXT: vdbpsadbw $4, %xmm1, %xmm0, %xmm2 # encoding: [0x62,0xf3,0x7d,0x08,0x42,0xd1,0x04] +; X86-NEXT: vmovdqa %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc4] +; X86-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_dbpsadbw_128: ; X64: # %bb.0: +; X64-NEXT: vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2] ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] -; X64-NEXT: vdbpsadbw $2, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x42,0xd1,0x02] +; X64-NEXT: vdbpsadbw $2, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x42,0xe1,0x02] ; X64-NEXT: vdbpsadbw $3, %xmm1, %xmm0, %xmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x42,0xd9,0x03] -; X64-NEXT: vdbpsadbw $4, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7d,0x08,0x42,0xc1,0x04] -; X64-NEXT: vpaddw %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfd,0xc0] -; X64-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] +; X64-NEXT: vdbpsadbw $4, %xmm1, %xmm0, %xmm2 # encoding: [0x62,0xf3,0x7d,0x08,0x42,0xd1,0x04] +; X64-NEXT: vmovdqa %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc4] +; X64-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb] ; X64-NEXT: retq # encoding: [0xc3] - %res = call <8 x i16> @llvm.x86.avx512.mask.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <8 x i16> %x3, i8 %x4) + %res0 = call <8 x i16> @llvm.x86.avx512.mask.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <8 x i16> %x3, i8 %x4) %res1 = call <8 x i16> @llvm.x86.avx512.mask.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 3, <8 x i16> zeroinitializer, i8 %x4) %res2 = call <8 x i16> @llvm.x86.avx512.mask.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 4, <8 x i16> %x3, i8 -1) - %res3 = add <8 x i16> %res, %res1 - %res4 = add <8 x i16> %res2, %res3 - ret <8 x i16> %res4 + %res3 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } poison, <8 x i16> %res0, 0 + %res4 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } %res3, <8 x i16> %res1, 1 + %res5 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } %res4, <8 x i16> %res2, 2 + ret { <8 x i16>, <8 x i16>, <8 x i16> } %res5 } declare <16 x i16> @llvm.x86.avx512.mask.dbpsadbw.256(<32 x i8>, <32 x i8>, i32, <16 x i16>, i16) -define <16 x i16>@test_int_x86_avx512_mask_dbpsadbw_256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x3, i16 %x4) { +define { <16 x i16>, <16 x i16>, <16 x i16> } @test_int_x86_avx512_mask_dbpsadbw_256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x3, i16 %x4) { ; X86-LABEL: test_int_x86_avx512_mask_dbpsadbw_256: ; X86: # %bb.0: +; X86-NEXT: vmovdqa %ymm2, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe2] ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] -; X86-NEXT: vdbpsadbw $2, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x42,0xd1,0x02] +; X86-NEXT: vdbpsadbw $2, %ymm1, %ymm0, %ymm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x42,0xe1,0x02] ; X86-NEXT: vdbpsadbw $3, %ymm1, %ymm0, %ymm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x42,0xd9,0x03] -; X86-NEXT: vdbpsadbw $4, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7d,0x28,0x42,0xc1,0x04] -; X86-NEXT: vpaddw %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfd,0xc0] -; X86-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] +; X86-NEXT: vdbpsadbw $4, %ymm1, %ymm0, %ymm2 # encoding: [0x62,0xf3,0x7d,0x28,0x42,0xd1,0x04] +; X86-NEXT: vmovdqa %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc4] +; X86-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_dbpsadbw_256: ; X64: # %bb.0: +; X64-NEXT: vmovdqa %ymm2, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe2] ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] -; X64-NEXT: vdbpsadbw $2, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x42,0xd1,0x02] +; X64-NEXT: vdbpsadbw $2, %ymm1, %ymm0, %ymm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x42,0xe1,0x02] ; X64-NEXT: vdbpsadbw $3, %ymm1, %ymm0, %ymm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x42,0xd9,0x03] -; X64-NEXT: vdbpsadbw $4, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7d,0x28,0x42,0xc1,0x04] -; X64-NEXT: vpaddw %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfd,0xc0] -; X64-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] +; X64-NEXT: vdbpsadbw $4, %ymm1, %ymm0, %ymm2 # encoding: [0x62,0xf3,0x7d,0x28,0x42,0xd1,0x04] +; X64-NEXT: vmovdqa %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc4] +; X64-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb] ; X64-NEXT: retq # encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.mask.dbpsadbw.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <16 x i16> %x3, i16 %x4) %res1 = call <16 x i16> @llvm.x86.avx512.mask.dbpsadbw.256(<32 x i8> %x0, <32 x i8> %x1, i32 3, <16 x i16> zeroinitializer, i16 %x4) %res2 = call <16 x i16> @llvm.x86.avx512.mask.dbpsadbw.256(<32 x i8> %x0, <32 x i8> %x1, i32 4, <16 x i16> %x3, i16 -1) - %res3 = add <16 x i16> %res, %res1 - %res4 = add <16 x i16> %res3, %res2 - ret <16 x i16> %res4 + %res3 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } poison, <16 x i16> %res, 0 + %res4 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } %res3, <16 x i16> %res1, 1 + %res5 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } %res4, <16 x i16> %res2, 2 + ret { <16 x i16>, <16 x i16>, <16 x i16> } %res5 } define <8 x i16> @test_mask_adds_epu16_rr_128(<8 x i16> %a, <8 x i16> %b) { diff --git a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll --- a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll @@ -1512,33 +1512,32 @@ declare <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16>, <16 x i8>, i8) -define <16 x i8>@test_int_x86_avx512_mask_pmov_wb_128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) { +define { <16 x i8>, <16 x i8>, <16 x i8> } @test_int_x86_avx512_mask_pmov_wb_128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) { ; X86-LABEL: test_int_x86_avx512_mask_pmov_wb_128: ; X86: # %bb.0: ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] -; X86-NEXT: vpmovwb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x30,0xc2] +; X86-NEXT: vpmovwb %xmm0, %xmm3 # encoding: [0x62,0xf2,0x7e,0x08,0x30,0xc3] ; X86-NEXT: vpmovwb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x30,0xc1] -; X86-NEXT: vpmovwb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x30,0xc0] -; X86-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0] -; X86-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0] +; X86-NEXT: vpmovwb %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x30,0xc2] +; X86-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_pmov_wb_128: ; X64: # %bb.0: ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] -; X64-NEXT: vpmovwb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x30,0xc2] +; X64-NEXT: vpmovwb %xmm0, %xmm3 # encoding: [0x62,0xf2,0x7e,0x08,0x30,0xc3] ; X64-NEXT: vpmovwb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x30,0xc1] -; X64-NEXT: vpmovwb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x30,0xc0] -; X64-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0] -; X64-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0] +; X64-NEXT: vpmovwb %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x30,0xc2] +; X64-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] ; X64-NEXT: retq # encoding: [0xc3] %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 -1) %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16> %x0, <16 x i8> zeroinitializer, i8 %x2) - %res3 = add <16 x i8> %res0, %res1 - %res4 = add <16 x i8> %res3, %res2 - ret <16 x i8> %res4 + %res3 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } poison, <16 x i8> %res0, 0 + %res4 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } %res3, <16 x i8> %res1, 1 + %res5 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } %res4, <16 x i8> %res2, 2 + ret { <16 x i8>, <16 x i8>, <16 x i8> } %res5 } declare void @llvm.x86.avx512.mask.pmov.wb.mem.128(i8* %ptr, <8 x i16>, i8) @@ -1566,33 +1565,32 @@ declare <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16>, <16 x i8>, i8) -define <16 x i8>@test_int_x86_avx512_mask_pmovs_wb_128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) { +define { <16 x i8>, <16 x i8>, <16 x i8> } @test_int_x86_avx512_mask_pmovs_wb_128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) { ; X86-LABEL: test_int_x86_avx512_mask_pmovs_wb_128: ; X86: # %bb.0: ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] -; X86-NEXT: vpmovswb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x20,0xc2] +; X86-NEXT: vpmovswb %xmm0, %xmm3 # encoding: [0x62,0xf2,0x7e,0x08,0x20,0xc3] ; X86-NEXT: vpmovswb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x20,0xc1] -; X86-NEXT: vpmovswb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x20,0xc0] -; X86-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0] -; X86-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0] +; X86-NEXT: vpmovswb %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x20,0xc2] +; X86-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_pmovs_wb_128: ; X64: # %bb.0: ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] -; X64-NEXT: vpmovswb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x20,0xc2] +; X64-NEXT: vpmovswb %xmm0, %xmm3 # encoding: [0x62,0xf2,0x7e,0x08,0x20,0xc3] ; X64-NEXT: vpmovswb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x20,0xc1] -; X64-NEXT: vpmovswb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x20,0xc0] -; X64-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0] -; X64-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0] +; X64-NEXT: vpmovswb %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x20,0xc2] +; X64-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] ; X64-NEXT: retq # encoding: [0xc3] %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 -1) %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16> %x0, <16 x i8> zeroinitializer, i8 %x2) - %res3 = add <16 x i8> %res0, %res1 - %res4 = add <16 x i8> %res3, %res2 - ret <16 x i8> %res4 + %res3 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } poison, <16 x i8> %res0, 0 + %res4 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } %res3, <16 x i8> %res1, 1 + %res5 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } %res4, <16 x i8> %res2, 2 + ret { <16 x i8>, <16 x i8>, <16 x i8> } %res5 } declare void @llvm.x86.avx512.mask.pmovs.wb.mem.128(i8* %ptr, <8 x i16>, i8) @@ -1620,33 +1618,32 @@ declare <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16>, <16 x i8>, i8) -define <16 x i8>@test_int_x86_avx512_mask_pmovus_wb_128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) { +define { <16 x i8>, <16 x i8>, <16 x i8> } @test_int_x86_avx512_mask_pmovus_wb_128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) { ; X86-LABEL: test_int_x86_avx512_mask_pmovus_wb_128: ; X86: # %bb.0: ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] -; X86-NEXT: vpmovuswb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x10,0xc2] +; X86-NEXT: vpmovuswb %xmm0, %xmm3 # encoding: [0x62,0xf2,0x7e,0x08,0x10,0xc3] ; X86-NEXT: vpmovuswb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x10,0xc1] -; X86-NEXT: vpmovuswb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x10,0xc0] -; X86-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0] -; X86-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0] +; X86-NEXT: vpmovuswb %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x10,0xc2] +; X86-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_pmovus_wb_128: ; X64: # %bb.0: ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] -; X64-NEXT: vpmovuswb %xmm0, %xmm2 # encoding: [0x62,0xf2,0x7e,0x08,0x10,0xc2] +; X64-NEXT: vpmovuswb %xmm0, %xmm3 # encoding: [0x62,0xf2,0x7e,0x08,0x10,0xc3] ; X64-NEXT: vpmovuswb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x10,0xc1] -; X64-NEXT: vpmovuswb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x10,0xc0] -; X64-NEXT: vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0] -; X64-NEXT: vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0] +; X64-NEXT: vpmovuswb %xmm0, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0x89,0x10,0xc2] +; X64-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] ; X64-NEXT: retq # encoding: [0xc3] %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 -1) %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16> %x0, <16 x i8> zeroinitializer, i8 %x2) - %res3 = add <16 x i8> %res0, %res1 - %res4 = add <16 x i8> %res3, %res2 - ret <16 x i8> %res4 + %res3 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } poison, <16 x i8> %res0, 0 + %res4 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } %res3, <16 x i8> %res1, 1 + %res5 = insertvalue { <16 x i8>, <16 x i8>, <16 x i8> } %res4, <16 x i8> %res2, 2 + ret { <16 x i8>, <16 x i8>, <16 x i8> } %res5 } declare void @llvm.x86.avx512.mask.pmovus.wb.mem.128(i8* %ptr, <8 x i16>, i8) @@ -1990,26 +1987,28 @@ declare <8 x i16> @llvm.x86.avx512.dbpsadbw.128(<16 x i8>, <16 x i8>, i32) -define <8 x i16>@test_int_x86_avx512_mask_dbpsadbw_128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x3, i8 %x4) { +define { <8 x i16>, <8 x i16>, <8 x i16> } @test_int_x86_avx512_mask_dbpsadbw_128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x3, i8 %x4) { ; X86-LABEL: test_int_x86_avx512_mask_dbpsadbw_128: ; X86: # %bb.0: +; X86-NEXT: vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2] ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] ; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] -; X86-NEXT: vdbpsadbw $2, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x42,0xd1,0x02] +; X86-NEXT: vdbpsadbw $2, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x42,0xe1,0x02] ; X86-NEXT: vdbpsadbw $3, %xmm1, %xmm0, %xmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x42,0xd9,0x03] -; X86-NEXT: vdbpsadbw $4, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7d,0x08,0x42,0xc1,0x04] -; X86-NEXT: vpaddw %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfd,0xc0] -; X86-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] +; X86-NEXT: vdbpsadbw $4, %xmm1, %xmm0, %xmm2 # encoding: [0x62,0xf3,0x7d,0x08,0x42,0xd1,0x04] +; X86-NEXT: vmovdqa %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc4] +; X86-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_dbpsadbw_128: ; X64: # %bb.0: +; X64-NEXT: vmovdqa %xmm2, %xmm4 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xe2] ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] -; X64-NEXT: vdbpsadbw $2, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x42,0xd1,0x02] +; X64-NEXT: vdbpsadbw $2, %xmm1, %xmm0, %xmm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x42,0xe1,0x02] ; X64-NEXT: vdbpsadbw $3, %xmm1, %xmm0, %xmm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x42,0xd9,0x03] -; X64-NEXT: vdbpsadbw $4, %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf3,0x7d,0x08,0x42,0xc1,0x04] -; X64-NEXT: vpaddw %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfd,0xc0] -; X64-NEXT: vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0] +; X64-NEXT: vdbpsadbw $4, %xmm1, %xmm0, %xmm2 # encoding: [0x62,0xf3,0x7d,0x08,0x42,0xd1,0x04] +; X64-NEXT: vmovdqa %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc4] +; X64-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb] ; X64-NEXT: retq # encoding: [0xc3] %1 = call <8 x i16> @llvm.x86.avx512.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 2) %2 = bitcast i8 %x4 to <8 x i1> @@ -2018,32 +2017,35 @@ %5 = bitcast i8 %x4 to <8 x i1> %6 = select <8 x i1> %5, <8 x i16> %4, <8 x i16> zeroinitializer %7 = call <8 x i16> @llvm.x86.avx512.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 4) - %res3 = add <8 x i16> %3, %6 - %res4 = add <8 x i16> %7, %res3 - ret <8 x i16> %res4 + %res3 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } poison, <8 x i16> %3, 0 + %res4 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } %res3, <8 x i16> %6, 1 + %res5 = insertvalue { <8 x i16>, <8 x i16>, <8 x i16> } %res4, <8 x i16> %7, 2 + ret { <8 x i16>, <8 x i16>, <8 x i16> } %res5 } declare <16 x i16> @llvm.x86.avx512.dbpsadbw.256(<32 x i8>, <32 x i8>, i32) -define <16 x i16>@test_int_x86_avx512_mask_dbpsadbw_256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x3, i16 %x4) { +define { <16 x i16>, <16 x i16>, <16 x i16> } @test_int_x86_avx512_mask_dbpsadbw_256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x3, i16 %x4) { ; X86-LABEL: test_int_x86_avx512_mask_dbpsadbw_256: ; X86: # %bb.0: +; X86-NEXT: vmovdqa %ymm2, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe2] ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] -; X86-NEXT: vdbpsadbw $2, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x42,0xd1,0x02] +; X86-NEXT: vdbpsadbw $2, %ymm1, %ymm0, %ymm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x42,0xe1,0x02] ; X86-NEXT: vdbpsadbw $3, %ymm1, %ymm0, %ymm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x42,0xd9,0x03] -; X86-NEXT: vdbpsadbw $4, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7d,0x28,0x42,0xc1,0x04] -; X86-NEXT: vpaddw %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfd,0xc0] -; X86-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] +; X86-NEXT: vdbpsadbw $4, %ymm1, %ymm0, %ymm2 # encoding: [0x62,0xf3,0x7d,0x28,0x42,0xd1,0x04] +; X86-NEXT: vmovdqa %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc4] +; X86-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_dbpsadbw_256: ; X64: # %bb.0: +; X64-NEXT: vmovdqa %ymm2, %ymm4 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xe2] ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] -; X64-NEXT: vdbpsadbw $2, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x42,0xd1,0x02] +; X64-NEXT: vdbpsadbw $2, %ymm1, %ymm0, %ymm4 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x42,0xe1,0x02] ; X64-NEXT: vdbpsadbw $3, %ymm1, %ymm0, %ymm3 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x42,0xd9,0x03] -; X64-NEXT: vdbpsadbw $4, %ymm1, %ymm0, %ymm0 # encoding: [0x62,0xf3,0x7d,0x28,0x42,0xc1,0x04] -; X64-NEXT: vpaddw %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfd,0xc0] -; X64-NEXT: vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0] +; X64-NEXT: vdbpsadbw $4, %ymm1, %ymm0, %ymm2 # encoding: [0x62,0xf3,0x7d,0x28,0x42,0xd1,0x04] +; X64-NEXT: vmovdqa %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc4] +; X64-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb] ; X64-NEXT: retq # encoding: [0xc3] %1 = call <16 x i16> @llvm.x86.avx512.dbpsadbw.256(<32 x i8> %x0, <32 x i8> %x1, i32 2) %2 = bitcast i16 %x4 to <16 x i1> @@ -2052,9 +2054,10 @@ %5 = bitcast i16 %x4 to <16 x i1> %6 = select <16 x i1> %5, <16 x i16> %4, <16 x i16> zeroinitializer %7 = call <16 x i16> @llvm.x86.avx512.dbpsadbw.256(<32 x i8> %x0, <32 x i8> %x1, i32 4) - %res3 = add <16 x i16> %3, %6 - %res4 = add <16 x i16> %res3, %7 - ret <16 x i16> %res4 + %res3 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } poison, <16 x i16> %3, 0 + %res4 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } %res3, <16 x i16> %6, 1 + %res5 = insertvalue { <16 x i16>, <16 x i16>, <16 x i16> } %res4, <16 x i16> %7, 2 + ret { <16 x i16>, <16 x i16>, <16 x i16> } %res5 } declare <16 x i16> @llvm.x86.avx512.mask.psrlv16.hi(<16 x i16>, <16 x i16>, <16 x i16>, i16) diff --git a/llvm/test/CodeGen/X86/avx512vl_vnni-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512vl_vnni-intrinsics-upgrade.ll --- a/llvm/test/CodeGen/X86/avx512vl_vnni-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx512vl_vnni-intrinsics-upgrade.ll @@ -14,31 +14,32 @@ ret <8 x i32> %res } -define <8 x i32>@test_int_x86_avx512_mask_vpdpbusd_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32>* %x2p, <8 x i32> %x4, i8 %x3) { +define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpdpbusd_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32>* %x2p, <8 x i32> %x4, i8 %x3) { ; X86-LABEL: test_int_x86_avx512_mask_vpdpbusd_256: ; X86: # %bb.0: +; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] -; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] -; X86-NEXT: vpdpbusd (%eax), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x50,0x18] -; X86-NEXT: vpdpbusd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x50,0xc2] -; X86-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0] +; X86-NEXT: vpdpbusd (%eax), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x50,0x00] +; X86-NEXT: vpdpbusd %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x50,0xda] +; X86-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpdpbusd_256: ; X64: # %bb.0: -; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] -; X64-NEXT: vpdpbusd (%rdi), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x50,0x1f] -; X64-NEXT: vpdpbusd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x50,0xc2] -; X64-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0] +; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vpdpbusd (%rdi), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x50,0x07] +; X64-NEXT: vpdpbusd %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x50,0xda] +; X64-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb] ; X64-NEXT: retq # encoding: [0xc3] %x2 = load <8 x i32>, <8 x i32>* %x2p - %res = call <8 x i32> @llvm.x86.avx512.mask.vpdpbusd.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) + %res0 = call <8 x i32> @llvm.x86.avx512.mask.vpdpbusd.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) %res1 = call <8 x i32> @llvm.x86.avx512.maskz.vpdpbusd.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4, i8 %x3) - %res2 = add <8 x i32> %res, %res1 - ret <8 x i32> %res2 + %res2 = insertvalue { <8 x i32>, <8 x i32> } poison, <8 x i32> %res0, 0 + %res3 = insertvalue { <8 x i32>, <8 x i32> } %res2, <8 x i32> %res1, 1 + ret { <8 x i32>, <8 x i32> } %res3 } declare <4 x i32> @llvm.x86.avx512.mask.vpdpbusd.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) @@ -53,31 +54,32 @@ ret <4 x i32> %res } -define <4 x i32>@test_int_x86_avx512_mask_vpdpbusd_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32>* %x2p, <4 x i32> %x4, i8 %x3) { +define { <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpdpbusd_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32>* %x2p, <4 x i32> %x4, i8 %x3) { ; X86-LABEL: test_int_x86_avx512_mask_vpdpbusd_128: ; X86: # %bb.0: +; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] -; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] -; X86-NEXT: vpdpbusd (%eax), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x50,0x18] -; X86-NEXT: vpdpbusd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x50,0xc2] -; X86-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] +; X86-NEXT: vpdpbusd (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x50,0x00] +; X86-NEXT: vpdpbusd %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x50,0xda] +; X86-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpdpbusd_128: ; X64: # %bb.0: -; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] -; X64-NEXT: vpdpbusd (%rdi), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x50,0x1f] -; X64-NEXT: vpdpbusd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x50,0xc2] -; X64-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] +; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vpdpbusd (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x50,0x07] +; X64-NEXT: vpdpbusd %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x50,0xda] +; X64-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb] ; X64-NEXT: retq # encoding: [0xc3] %x2 = load <4 x i32>, <4 x i32>* %x2p - %res = call <4 x i32> @llvm.x86.avx512.mask.vpdpbusd.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) + %res0 = call <4 x i32> @llvm.x86.avx512.mask.vpdpbusd.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) %res1 = call <4 x i32> @llvm.x86.avx512.maskz.vpdpbusd.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4, i8 %x3) - %res2 = add <4 x i32> %res, %res1 - ret <4 x i32> %res2 + %res2 = insertvalue { <4 x i32>, <4 x i32> } poison, <4 x i32> %res0, 0 + %res3 = insertvalue { <4 x i32>, <4 x i32> } %res2, <4 x i32> %res1, 1 + ret { <4 x i32>, <4 x i32> } %res3 } declare <8 x i32> @llvm.x86.avx512.mask.vpdpbusds.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) @@ -92,31 +94,32 @@ ret <8 x i32> %res } -define <8 x i32>@test_int_x86_avx512_mask_vpdpbusds_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32>* %x2p, <8 x i32> %x4, i8 %x3) { +define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpdpbusds_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32>* %x2p, <8 x i32> %x4, i8 %x3) { ; X86-LABEL: test_int_x86_avx512_mask_vpdpbusds_256: ; X86: # %bb.0: +; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] -; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] -; X86-NEXT: vpdpbusds (%eax), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x51,0x18] -; X86-NEXT: vpdpbusds %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x51,0xc2] -; X86-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0] +; X86-NEXT: vpdpbusds (%eax), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x51,0x00] +; X86-NEXT: vpdpbusds %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x51,0xda] +; X86-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpdpbusds_256: ; X64: # %bb.0: -; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] -; X64-NEXT: vpdpbusds (%rdi), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x51,0x1f] -; X64-NEXT: vpdpbusds %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x51,0xc2] -; X64-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0] +; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vpdpbusds (%rdi), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x51,0x07] +; X64-NEXT: vpdpbusds %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x51,0xda] +; X64-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb] ; X64-NEXT: retq # encoding: [0xc3] %x2 = load <8 x i32>, <8 x i32>* %x2p - %res = call <8 x i32> @llvm.x86.avx512.mask.vpdpbusds.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) + %res0 = call <8 x i32> @llvm.x86.avx512.mask.vpdpbusds.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) %res1 = call <8 x i32> @llvm.x86.avx512.maskz.vpdpbusds.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4, i8 %x3) - %res2 = add <8 x i32> %res, %res1 - ret <8 x i32> %res2 + %res2 = insertvalue { <8 x i32>, <8 x i32> } poison, <8 x i32> %res0, 0 + %res3 = insertvalue { <8 x i32>, <8 x i32> } %res2, <8 x i32> %res1, 1 + ret { <8 x i32>, <8 x i32> } %res3 } declare <4 x i32> @llvm.x86.avx512.mask.vpdpbusds.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) @@ -131,31 +134,32 @@ ret <4 x i32> %res } -define <4 x i32>@test_int_x86_avx512_mask_vpdpbusds_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32>* %x2p, <4 x i32> %x4, i8 %x3) { +define { <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpdpbusds_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32>* %x2p, <4 x i32> %x4, i8 %x3) { ; X86-LABEL: test_int_x86_avx512_mask_vpdpbusds_128: ; X86: # %bb.0: +; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] -; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] -; X86-NEXT: vpdpbusds (%eax), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x51,0x18] -; X86-NEXT: vpdpbusds %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x51,0xc2] -; X86-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] +; X86-NEXT: vpdpbusds (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x51,0x00] +; X86-NEXT: vpdpbusds %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x51,0xda] +; X86-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpdpbusds_128: ; X64: # %bb.0: -; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] -; X64-NEXT: vpdpbusds (%rdi), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x51,0x1f] -; X64-NEXT: vpdpbusds %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x51,0xc2] -; X64-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] +; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vpdpbusds (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x51,0x07] +; X64-NEXT: vpdpbusds %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x51,0xda] +; X64-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb] ; X64-NEXT: retq # encoding: [0xc3] %x2 = load <4 x i32>, <4 x i32>* %x2p - %res = call <4 x i32> @llvm.x86.avx512.mask.vpdpbusds.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) + %res0 = call <4 x i32> @llvm.x86.avx512.mask.vpdpbusds.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) %res1 = call <4 x i32> @llvm.x86.avx512.maskz.vpdpbusds.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4, i8 %x3) - %res2 = add <4 x i32> %res, %res1 - ret <4 x i32> %res2 + %res2 = insertvalue { <4 x i32>, <4 x i32> } poison, <4 x i32> %res0, 0 + %res3 = insertvalue { <4 x i32>, <4 x i32> } %res2, <4 x i32> %res1, 1 + ret { <4 x i32>, <4 x i32> } %res3 } declare <8 x i32> @llvm.x86.avx512.mask.vpdpwssd.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) @@ -170,31 +174,32 @@ ret <8 x i32> %res } -define <8 x i32>@test_int_x86_avx512_mask_vpdpwssd_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32>* %x2p, <8 x i32> %x4, i8 %x3) { +define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpdpwssd_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32>* %x2p, <8 x i32> %x4, i8 %x3) { ; X86-LABEL: test_int_x86_avx512_mask_vpdpwssd_256: ; X86: # %bb.0: +; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] -; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] -; X86-NEXT: vpdpwssd (%eax), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x52,0x18] -; X86-NEXT: vpdpwssd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x52,0xc2] -; X86-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0] +; X86-NEXT: vpdpwssd (%eax), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x52,0x00] +; X86-NEXT: vpdpwssd %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x52,0xda] +; X86-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpdpwssd_256: ; X64: # %bb.0: -; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] -; X64-NEXT: vpdpwssd (%rdi), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x52,0x1f] -; X64-NEXT: vpdpwssd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x52,0xc2] -; X64-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0] +; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vpdpwssd (%rdi), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x52,0x07] +; X64-NEXT: vpdpwssd %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x52,0xda] +; X64-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb] ; X64-NEXT: retq # encoding: [0xc3] %x2 = load <8 x i32>, <8 x i32>* %x2p - %res = call <8 x i32> @llvm.x86.avx512.mask.vpdpwssd.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) + %res0 = call <8 x i32> @llvm.x86.avx512.mask.vpdpwssd.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) %res1 = call <8 x i32> @llvm.x86.avx512.maskz.vpdpwssd.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4, i8 %x3) - %res2 = add <8 x i32> %res, %res1 - ret <8 x i32> %res2 + %res2 = insertvalue { <8 x i32>, <8 x i32> } poison, <8 x i32> %res0, 0 + %res3 = insertvalue { <8 x i32>, <8 x i32> } %res2, <8 x i32> %res1, 1 + ret { <8 x i32>, <8 x i32> } %res3 } declare <4 x i32> @llvm.x86.avx512.mask.vpdpwssd.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) @@ -209,31 +214,32 @@ ret <4 x i32> %res } -define <4 x i32>@test_int_x86_avx512_mask_vpdpwssd_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32>* %x2p, <4 x i32> %x4, i8 %x3) { +define { <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpdpwssd_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32>* %x2p, <4 x i32> %x4, i8 %x3) { ; X86-LABEL: test_int_x86_avx512_mask_vpdpwssd_128: ; X86: # %bb.0: +; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] -; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] -; X86-NEXT: vpdpwssd (%eax), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x52,0x18] -; X86-NEXT: vpdpwssd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x52,0xc2] -; X86-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] +; X86-NEXT: vpdpwssd (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x52,0x00] +; X86-NEXT: vpdpwssd %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x52,0xda] +; X86-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpdpwssd_128: ; X64: # %bb.0: -; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] -; X64-NEXT: vpdpwssd (%rdi), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x52,0x1f] -; X64-NEXT: vpdpwssd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x52,0xc2] -; X64-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] +; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vpdpwssd (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x52,0x07] +; X64-NEXT: vpdpwssd %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x52,0xda] +; X64-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb] ; X64-NEXT: retq # encoding: [0xc3] %x2 = load <4 x i32>, <4 x i32>* %x2p - %res = call <4 x i32> @llvm.x86.avx512.mask.vpdpwssd.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) + %res0 = call <4 x i32> @llvm.x86.avx512.mask.vpdpwssd.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) %res1 = call <4 x i32> @llvm.x86.avx512.maskz.vpdpwssd.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4, i8 %x3) - %res2 = add <4 x i32> %res, %res1 - ret <4 x i32> %res2 + %res2 = insertvalue { <4 x i32>, <4 x i32> } poison, <4 x i32> %res0, 0 + %res3 = insertvalue { <4 x i32>, <4 x i32> } %res2, <4 x i32> %res1, 1 + ret { <4 x i32>, <4 x i32> } %res3 } @@ -249,31 +255,32 @@ ret <8 x i32> %res } -define <8 x i32>@test_int_x86_avx512_mask_vpdpwssds_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32>* %x2p, <8 x i32> %x4, i8 %x3) { +define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpdpwssds_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32>* %x2p, <8 x i32> %x4, i8 %x3) { ; X86-LABEL: test_int_x86_avx512_mask_vpdpwssds_256: ; X86: # %bb.0: +; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] -; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] -; X86-NEXT: vpdpwssds (%eax), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x53,0x18] -; X86-NEXT: vpdpwssds %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x53,0xc2] -; X86-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0] +; X86-NEXT: vpdpwssds (%eax), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x53,0x00] +; X86-NEXT: vpdpwssds %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x53,0xda] +; X86-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpdpwssds_256: ; X64: # %bb.0: -; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] -; X64-NEXT: vpdpwssds (%rdi), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x53,0x1f] -; X64-NEXT: vpdpwssds %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x53,0xc2] -; X64-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0] +; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vpdpwssds (%rdi), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x53,0x07] +; X64-NEXT: vpdpwssds %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x53,0xda] +; X64-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb] ; X64-NEXT: retq # encoding: [0xc3] %x2 = load <8 x i32>, <8 x i32>* %x2p - %res = call <8 x i32> @llvm.x86.avx512.mask.vpdpwssds.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) + %res0 = call <8 x i32> @llvm.x86.avx512.mask.vpdpwssds.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) %res1 = call <8 x i32> @llvm.x86.avx512.maskz.vpdpwssds.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4, i8 %x3) - %res2 = add <8 x i32> %res, %res1 - ret <8 x i32> %res2 + %res2 = insertvalue { <8 x i32>, <8 x i32> } poison, <8 x i32> %res0, 0 + %res3 = insertvalue { <8 x i32>, <8 x i32> } %res2, <8 x i32> %res1, 1 + ret { <8 x i32>, <8 x i32> } %res3 } declare <4 x i32> @llvm.x86.avx512.mask.vpdpwssds.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) @@ -288,29 +295,30 @@ ret <4 x i32> %res } -define <4 x i32>@test_int_x86_avx512_mask_vpdpwssds_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32>* %x2p, <4 x i32> %x4, i8 %x3) { +define { <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpdpwssds_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32>* %x2p, <4 x i32> %x4, i8 %x3) { ; X86-LABEL: test_int_x86_avx512_mask_vpdpwssds_128: ; X86: # %bb.0: +; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] -; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] -; X86-NEXT: vpdpwssds (%eax), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x53,0x18] -; X86-NEXT: vpdpwssds %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x53,0xc2] -; X86-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] +; X86-NEXT: vpdpwssds (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x53,0x00] +; X86-NEXT: vpdpwssds %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x53,0xda] +; X86-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpdpwssds_128: ; X64: # %bb.0: -; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] -; X64-NEXT: vpdpwssds (%rdi), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x53,0x1f] -; X64-NEXT: vpdpwssds %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x53,0xc2] -; X64-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] +; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vpdpwssds (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x53,0x07] +; X64-NEXT: vpdpwssds %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x53,0xda] +; X64-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb] ; X64-NEXT: retq # encoding: [0xc3] %x2 = load <4 x i32>, <4 x i32>* %x2p - %res = call <4 x i32> @llvm.x86.avx512.mask.vpdpwssds.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) + %res0 = call <4 x i32> @llvm.x86.avx512.mask.vpdpwssds.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) %res1 = call <4 x i32> @llvm.x86.avx512.maskz.vpdpwssds.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x4, i8 %x3) - %res2 = add <4 x i32> %res, %res1 - ret <4 x i32> %res2 + %res2 = insertvalue { <4 x i32>, <4 x i32> } poison, <4 x i32> %res0, 0 + %res3 = insertvalue { <4 x i32>, <4 x i32> } %res2, <4 x i32> %res1, 1 + ret { <4 x i32>, <4 x i32> } %res3 } diff --git a/llvm/test/CodeGen/X86/avx512vl_vnni-intrinsics.ll b/llvm/test/CodeGen/X86/avx512vl_vnni-intrinsics.ll --- a/llvm/test/CodeGen/X86/avx512vl_vnni-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512vl_vnni-intrinsics.ll @@ -13,25 +13,25 @@ ret <8 x i32> %1 } -define <8 x i32>@test_int_x86_avx512_mask_vpdpbusd_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32>* %x2p, <8 x i32> %x4, i8 %x3) { +define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpdpbusd_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32>* %x2p, <8 x i32> %x4, i8 %x3) { ; X86-LABEL: test_int_x86_avx512_mask_vpdpbusd_256: ; X86: # %bb.0: +; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] -; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] -; X86-NEXT: vpdpbusd (%eax), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x50,0x18] -; X86-NEXT: vpdpbusd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x50,0xc2] -; X86-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0] +; X86-NEXT: vpdpbusd (%eax), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x50,0x00] +; X86-NEXT: vpdpbusd %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x50,0xda] +; X86-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpdpbusd_256: ; X64: # %bb.0: -; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] -; X64-NEXT: vpdpbusd (%rdi), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x50,0x1f] -; X64-NEXT: vpdpbusd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x50,0xc2] -; X64-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0] +; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vpdpbusd (%rdi), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x50,0x07] +; X64-NEXT: vpdpbusd %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x50,0xda] +; X64-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb] ; X64-NEXT: retq # encoding: [0xc3] %x2 = load <8 x i32>, <8 x i32>* %x2p %1 = call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) @@ -40,8 +40,9 @@ %4 = call <8 x i32> @llvm.x86.avx512.vpdpbusd.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4) %5 = bitcast i8 %x3 to <8 x i1> %6 = select <8 x i1> %5, <8 x i32> %4, <8 x i32> zeroinitializer - %res3 = add <8 x i32> %3, %6 - ret <8 x i32> %res3 + %res1 = insertvalue { <8 x i32>, <8 x i32> } poison, <8 x i32> %3, 0 + %res2 = insertvalue { <8 x i32>, <8 x i32> } %res1, <8 x i32> %6, 1 + ret { <8 x i32>, <8 x i32> } %res2 } declare <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32>, <4 x i32>, <4 x i32>) @@ -55,25 +56,25 @@ ret <4 x i32> %1 } -define <4 x i32>@test_int_x86_avx512_mask_vpdpbusd_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32>* %x2p, <4 x i32> %x4, i8 %x3) { +define { <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpdpbusd_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32>* %x2p, <4 x i32> %x4, i8 %x3) { ; X86-LABEL: test_int_x86_avx512_mask_vpdpbusd_128: ; X86: # %bb.0: +; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] -; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] -; X86-NEXT: vpdpbusd (%eax), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x50,0x18] -; X86-NEXT: vpdpbusd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x50,0xc2] -; X86-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] +; X86-NEXT: vpdpbusd (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x50,0x00] +; X86-NEXT: vpdpbusd %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x50,0xda] +; X86-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpdpbusd_128: ; X64: # %bb.0: -; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] -; X64-NEXT: vpdpbusd (%rdi), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x50,0x1f] -; X64-NEXT: vpdpbusd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x50,0xc2] -; X64-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] +; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vpdpbusd (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x50,0x07] +; X64-NEXT: vpdpbusd %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x50,0xda] +; X64-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb] ; X64-NEXT: retq # encoding: [0xc3] %x2 = load <4 x i32>, <4 x i32>* %x2p %1 = call <4 x i32> @llvm.x86.avx512.vpdpbusd.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) @@ -84,8 +85,9 @@ %5 = bitcast i8 %x3 to <8 x i1> %extract1 = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> %6 = select <4 x i1> %extract1, <4 x i32> %4, <4 x i32> zeroinitializer - %res3 = add <4 x i32> %3, %6 - ret <4 x i32> %res3 + %res1 = insertvalue { <4 x i32>, <4 x i32> } poison, <4 x i32> %3, 0 + %res2 = insertvalue { <4 x i32>, <4 x i32> } %res1, <4 x i32> %6, 1 + ret { <4 x i32>, <4 x i32> } %res2 } declare <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32>, <8 x i32>, <8 x i32>) @@ -99,25 +101,25 @@ ret <8 x i32> %1 } -define <8 x i32>@test_int_x86_avx512_mask_vpdpbusds_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32>* %x2p, <8 x i32> %x4, i8 %x3) { +define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpdpbusds_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32>* %x2p, <8 x i32> %x4, i8 %x3) { ; X86-LABEL: test_int_x86_avx512_mask_vpdpbusds_256: ; X86: # %bb.0: +; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] -; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] -; X86-NEXT: vpdpbusds (%eax), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x51,0x18] -; X86-NEXT: vpdpbusds %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x51,0xc2] -; X86-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0] +; X86-NEXT: vpdpbusds (%eax), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x51,0x00] +; X86-NEXT: vpdpbusds %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x51,0xda] +; X86-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpdpbusds_256: ; X64: # %bb.0: -; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] -; X64-NEXT: vpdpbusds (%rdi), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x51,0x1f] -; X64-NEXT: vpdpbusds %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x51,0xc2] -; X64-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0] +; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vpdpbusds (%rdi), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x51,0x07] +; X64-NEXT: vpdpbusds %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x51,0xda] +; X64-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb] ; X64-NEXT: retq # encoding: [0xc3] %x2 = load <8 x i32>, <8 x i32>* %x2p %1 = call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) @@ -126,8 +128,9 @@ %4 = call <8 x i32> @llvm.x86.avx512.vpdpbusds.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4) %5 = bitcast i8 %x3 to <8 x i1> %6 = select <8 x i1> %5, <8 x i32> %4, <8 x i32> zeroinitializer - %res3 = add <8 x i32> %3, %6 - ret <8 x i32> %res3 + %res1 = insertvalue { <8 x i32>, <8 x i32> } poison, <8 x i32> %3, 0 + %res2 = insertvalue { <8 x i32>, <8 x i32> } %res1, <8 x i32> %6, 1 + ret { <8 x i32>, <8 x i32> } %res2 } declare <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32>, <4 x i32>, <4 x i32>) @@ -141,25 +144,25 @@ ret <4 x i32> %1 } -define <4 x i32>@test_int_x86_avx512_mask_vpdpbusds_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32>* %x2p, <4 x i32> %x4, i8 %x3) { +define { <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpdpbusds_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32>* %x2p, <4 x i32> %x4, i8 %x3) { ; X86-LABEL: test_int_x86_avx512_mask_vpdpbusds_128: ; X86: # %bb.0: +; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] -; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] -; X86-NEXT: vpdpbusds (%eax), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x51,0x18] -; X86-NEXT: vpdpbusds %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x51,0xc2] -; X86-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] +; X86-NEXT: vpdpbusds (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x51,0x00] +; X86-NEXT: vpdpbusds %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x51,0xda] +; X86-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpdpbusds_128: ; X64: # %bb.0: -; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] -; X64-NEXT: vpdpbusds (%rdi), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x51,0x1f] -; X64-NEXT: vpdpbusds %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x51,0xc2] -; X64-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] +; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vpdpbusds (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x51,0x07] +; X64-NEXT: vpdpbusds %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x51,0xda] +; X64-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb] ; X64-NEXT: retq # encoding: [0xc3] %x2 = load <4 x i32>, <4 x i32>* %x2p %1 = call <4 x i32> @llvm.x86.avx512.vpdpbusds.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) @@ -170,8 +173,9 @@ %5 = bitcast i8 %x3 to <8 x i1> %extract1 = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> %6 = select <4 x i1> %extract1, <4 x i32> %4, <4 x i32> zeroinitializer - %res3 = add <4 x i32> %3, %6 - ret <4 x i32> %res3 + %res1 = insertvalue { <4 x i32>, <4 x i32> } poison, <4 x i32> %3, 0 + %res2 = insertvalue { <4 x i32>, <4 x i32> } %res1, <4 x i32> %6, 1 + ret { <4 x i32>, <4 x i32> } %res2 } declare <8 x i32> @llvm.x86.avx512.vpdpwssd.256(<8 x i32>, <8 x i32>, <8 x i32>) @@ -185,25 +189,25 @@ ret <8 x i32> %1 } -define <8 x i32>@test_int_x86_avx512_mask_vpdpwssd_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32>* %x2p, <8 x i32> %x4, i8 %x3) { +define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpdpwssd_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32>* %x2p, <8 x i32> %x4, i8 %x3) { ; X86-LABEL: test_int_x86_avx512_mask_vpdpwssd_256: ; X86: # %bb.0: +; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] -; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] -; X86-NEXT: vpdpwssd (%eax), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x52,0x18] -; X86-NEXT: vpdpwssd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x52,0xc2] -; X86-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0] +; X86-NEXT: vpdpwssd (%eax), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x52,0x00] +; X86-NEXT: vpdpwssd %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x52,0xda] +; X86-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpdpwssd_256: ; X64: # %bb.0: -; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] -; X64-NEXT: vpdpwssd (%rdi), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x52,0x1f] -; X64-NEXT: vpdpwssd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x52,0xc2] -; X64-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0] +; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vpdpwssd (%rdi), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x52,0x07] +; X64-NEXT: vpdpwssd %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x52,0xda] +; X64-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb] ; X64-NEXT: retq # encoding: [0xc3] %x2 = load <8 x i32>, <8 x i32>* %x2p %1 = call <8 x i32> @llvm.x86.avx512.vpdpwssd.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) @@ -212,8 +216,9 @@ %4 = call <8 x i32> @llvm.x86.avx512.vpdpwssd.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4) %5 = bitcast i8 %x3 to <8 x i1> %6 = select <8 x i1> %5, <8 x i32> %4, <8 x i32> zeroinitializer - %res3 = add <8 x i32> %3, %6 - ret <8 x i32> %res3 + %res1 = insertvalue { <8 x i32>, <8 x i32> } poison, <8 x i32> %3, 0 + %res2 = insertvalue { <8 x i32>, <8 x i32> } %res1, <8 x i32> %6, 1 + ret { <8 x i32>, <8 x i32> } %res2 } declare <4 x i32> @llvm.x86.avx512.vpdpwssd.128(<4 x i32>, <4 x i32>, <4 x i32>) @@ -227,25 +232,25 @@ ret <4 x i32> %1 } -define <4 x i32>@test_int_x86_avx512_mask_vpdpwssd_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32>* %x2p, <4 x i32> %x4, i8 %x3) { +define { <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpdpwssd_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32>* %x2p, <4 x i32> %x4, i8 %x3) { ; X86-LABEL: test_int_x86_avx512_mask_vpdpwssd_128: ; X86: # %bb.0: +; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] -; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] -; X86-NEXT: vpdpwssd (%eax), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x52,0x18] -; X86-NEXT: vpdpwssd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x52,0xc2] -; X86-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] +; X86-NEXT: vpdpwssd (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x52,0x00] +; X86-NEXT: vpdpwssd %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x52,0xda] +; X86-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpdpwssd_128: ; X64: # %bb.0: -; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] -; X64-NEXT: vpdpwssd (%rdi), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x52,0x1f] -; X64-NEXT: vpdpwssd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x52,0xc2] -; X64-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] +; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vpdpwssd (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x52,0x07] +; X64-NEXT: vpdpwssd %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x52,0xda] +; X64-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb] ; X64-NEXT: retq # encoding: [0xc3] %x2 = load <4 x i32>, <4 x i32>* %x2p %1 = call <4 x i32> @llvm.x86.avx512.vpdpwssd.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) @@ -256,8 +261,9 @@ %5 = bitcast i8 %x3 to <8 x i1> %extract1 = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> %6 = select <4 x i1> %extract1, <4 x i32> %4, <4 x i32> zeroinitializer - %res3 = add <4 x i32> %3, %6 - ret <4 x i32> %res3 + %res1 = insertvalue { <4 x i32>, <4 x i32> } poison, <4 x i32> %3, 0 + %res2 = insertvalue { <4 x i32>, <4 x i32> } %res1, <4 x i32> %6, 1 + ret { <4 x i32>, <4 x i32> } %res2 } declare <8 x i32> @llvm.x86.avx512.vpdpwssds.256(<8 x i32>, <8 x i32>, <8 x i32>) @@ -271,25 +277,25 @@ ret <8 x i32> %1 } -define <8 x i32>@test_int_x86_avx512_mask_vpdpwssds_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32>* %x2p, <8 x i32> %x4, i8 %x3) { +define { <8 x i32>, <8 x i32> } @test_int_x86_avx512_mask_vpdpwssds_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32>* %x2p, <8 x i32> %x4, i8 %x3) { ; X86-LABEL: test_int_x86_avx512_mask_vpdpwssds_256: ; X86: # %bb.0: +; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] -; X86-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] -; X86-NEXT: vpdpwssds (%eax), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x53,0x18] -; X86-NEXT: vpdpwssds %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x53,0xc2] -; X86-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0] +; X86-NEXT: vpdpwssds (%eax), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x53,0x00] +; X86-NEXT: vpdpwssds %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x53,0xda] +; X86-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpdpwssds_256: ; X64: # %bb.0: -; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8] -; X64-NEXT: vpdpwssds (%rdi), %ymm1, %ymm3 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x53,0x1f] -; X64-NEXT: vpdpwssds %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x53,0xc2] -; X64-NEXT: vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0] +; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vpdpwssds (%rdi), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x53,0x07] +; X64-NEXT: vpdpwssds %ymm2, %ymm1, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x53,0xda] +; X64-NEXT: vmovdqa %ymm3, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xcb] ; X64-NEXT: retq # encoding: [0xc3] %x2 = load <8 x i32>, <8 x i32>* %x2p %1 = call <8 x i32> @llvm.x86.avx512.vpdpwssds.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2) @@ -298,8 +304,9 @@ %4 = call <8 x i32> @llvm.x86.avx512.vpdpwssds.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x4) %5 = bitcast i8 %x3 to <8 x i1> %6 = select <8 x i1> %5, <8 x i32> %4, <8 x i32> zeroinitializer - %res3 = add <8 x i32> %3, %6 - ret <8 x i32> %res3 + %res1 = insertvalue { <8 x i32>, <8 x i32> } poison, <8 x i32> %3, 0 + %res2 = insertvalue { <8 x i32>, <8 x i32> } %res1, <8 x i32> %6, 1 + ret { <8 x i32>, <8 x i32> } %res2 } declare <4 x i32> @llvm.x86.avx512.vpdpwssds.128(<4 x i32>, <4 x i32>, <4 x i32>) @@ -320,25 +327,25 @@ ret <4 x i32> %1 } -define <4 x i32>@test_int_x86_avx512_mask_vpdpwssds_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32>* %x2p, <4 x i32> %x4, i8 %x3) { +define { <4 x i32>, <4 x i32> } @test_int_x86_avx512_mask_vpdpwssds_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32>* %x2p, <4 x i32> %x4, i8 %x3) { ; X86-LABEL: test_int_x86_avx512_mask_vpdpwssds_128: ; X86: # %bb.0: +; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] ; X86-NEXT: kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9] -; X86-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] -; X86-NEXT: vpdpwssds (%eax), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x53,0x18] -; X86-NEXT: vpdpwssds %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x53,0xc2] -; X86-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] +; X86-NEXT: vpdpwssds (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x53,0x00] +; X86-NEXT: vpdpwssds %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x53,0xda] +; X86-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpdpwssds_128: ; X64: # %bb.0: -; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8] -; X64-NEXT: vpdpwssds (%rdi), %xmm1, %xmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x53,0x1f] -; X64-NEXT: vpdpwssds %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x53,0xc2] -; X64-NEXT: vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0] +; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vpdpwssds (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0x53,0x07] +; X64-NEXT: vpdpwssds %xmm2, %xmm1, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0x53,0xda] +; X64-NEXT: vmovdqa %xmm3, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xcb] ; X64-NEXT: retq # encoding: [0xc3] %x2 = load <4 x i32>, <4 x i32>* %x2p %1 = call <4 x i32> @llvm.x86.avx512.vpdpwssds.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2) @@ -349,6 +356,7 @@ %5 = bitcast i8 %x3 to <8 x i1> %extract1 = shufflevector <8 x i1> %5, <8 x i1> %5, <4 x i32> %6 = select <4 x i1> %extract1, <4 x i32> %4, <4 x i32> zeroinitializer - %res3 = add <4 x i32> %3, %6 - ret <4 x i32> %res3 + %res1 = insertvalue { <4 x i32>, <4 x i32> } poison, <4 x i32> %3, 0 + %res2 = insertvalue { <4 x i32>, <4 x i32> } %res1, <4 x i32> %6, 1 + ret { <4 x i32>, <4 x i32> } %res2 } diff --git a/llvm/test/CodeGen/X86/avx512vnni-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512vnni-intrinsics-upgrade.ll --- a/llvm/test/CodeGen/X86/avx512vnni-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx512vnni-intrinsics-upgrade.ll @@ -14,30 +14,31 @@ ret <16 x i32> %res } -define <16 x i32>@test_int_x86_avx512_mask_vpdpbusd_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, <16 x i32> %x4, i16 %x3) { +define { <16 x i32>, <16 x i32> } @test_int_x86_avx512_mask_vpdpbusd_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, <16 x i32> %x4, i16 %x3) { ; X86-LABEL: test_int_x86_avx512_mask_vpdpbusd_512: ; X86: # %bb.0: +; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] -; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] -; X86-NEXT: vpdpbusd (%eax), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x50,0x18] -; X86-NEXT: vpdpbusd %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x50,0xc2] -; X86-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0] +; X86-NEXT: vpdpbusd (%eax), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x50,0x00] +; X86-NEXT: vpdpbusd %zmm2, %zmm1, %zmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x50,0xda] +; X86-NEXT: vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpdpbusd_512: ; X64: # %bb.0: -; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] -; X64-NEXT: vpdpbusd (%rdi), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x50,0x1f] -; X64-NEXT: vpdpbusd %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x50,0xc2] -; X64-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0] +; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vpdpbusd (%rdi), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x50,0x07] +; X64-NEXT: vpdpbusd %zmm2, %zmm1, %zmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x50,0xda] +; X64-NEXT: vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb] ; X64-NEXT: retq # encoding: [0xc3] %x2 = load <16 x i32>, <16 x i32>* %x2p - %res = call <16 x i32> @llvm.x86.avx512.mask.vpdpbusd.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) + %res0 = call <16 x i32> @llvm.x86.avx512.mask.vpdpbusd.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) %res1 = call <16 x i32> @llvm.x86.avx512.maskz.vpdpbusd.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4, i16 %x3) - %res2 = add <16 x i32> %res, %res1 - ret <16 x i32> %res2 + %res2 = insertvalue { <16 x i32>, <16 x i32> } poison, <16 x i32> %res0, 0 + %res3 = insertvalue { <16 x i32>, <16 x i32> } %res2, <16 x i32> %res1, 1 + ret { <16 x i32>, <16 x i32> } %res3 } declare <16 x i32> @llvm.x86.avx512.mask.vpdpbusds.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) @@ -52,30 +53,31 @@ ret <16 x i32> %res } -define <16 x i32>@test_int_x86_avx512_mask_vpdpbusds_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, <16 x i32> %x4, i16 %x3) { +define { <16 x i32>, <16 x i32> } @test_int_x86_avx512_mask_vpdpbusds_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, <16 x i32> %x4, i16 %x3) { ; X86-LABEL: test_int_x86_avx512_mask_vpdpbusds_512: ; X86: # %bb.0: +; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] -; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] -; X86-NEXT: vpdpbusds (%eax), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x51,0x18] -; X86-NEXT: vpdpbusds %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x51,0xc2] -; X86-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0] +; X86-NEXT: vpdpbusds (%eax), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x51,0x00] +; X86-NEXT: vpdpbusds %zmm2, %zmm1, %zmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x51,0xda] +; X86-NEXT: vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpdpbusds_512: ; X64: # %bb.0: -; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] -; X64-NEXT: vpdpbusds (%rdi), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x51,0x1f] -; X64-NEXT: vpdpbusds %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x51,0xc2] -; X64-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0] +; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vpdpbusds (%rdi), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x51,0x07] +; X64-NEXT: vpdpbusds %zmm2, %zmm1, %zmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x51,0xda] +; X64-NEXT: vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb] ; X64-NEXT: retq # encoding: [0xc3] %x2 = load <16 x i32>, <16 x i32>* %x2p - %res = call <16 x i32> @llvm.x86.avx512.mask.vpdpbusds.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) + %res0 = call <16 x i32> @llvm.x86.avx512.mask.vpdpbusds.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) %res1 = call <16 x i32> @llvm.x86.avx512.maskz.vpdpbusds.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4, i16 %x3) - %res2 = add <16 x i32> %res, %res1 - ret <16 x i32> %res2 + %res2 = insertvalue { <16 x i32>, <16 x i32> } poison, <16 x i32> %res0, 0 + %res3 = insertvalue { <16 x i32>, <16 x i32> } %res2, <16 x i32> %res1, 1 + ret { <16 x i32>, <16 x i32> } %res3 } declare <16 x i32> @llvm.x86.avx512.mask.vpdpwssd.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) @@ -90,30 +92,31 @@ ret <16 x i32> %res } -define <16 x i32>@test_int_x86_avx512_mask_vpdpwssd_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, <16 x i32> %x4, i16 %x3) { +define { <16 x i32>, <16 x i32> } @test_int_x86_avx512_mask_vpdpwssd_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, <16 x i32> %x4, i16 %x3) { ; X86-LABEL: test_int_x86_avx512_mask_vpdpwssd_512: ; X86: # %bb.0: +; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] -; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] -; X86-NEXT: vpdpwssd (%eax), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x52,0x18] -; X86-NEXT: vpdpwssd %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x52,0xc2] -; X86-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0] +; X86-NEXT: vpdpwssd (%eax), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x52,0x00] +; X86-NEXT: vpdpwssd %zmm2, %zmm1, %zmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x52,0xda] +; X86-NEXT: vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpdpwssd_512: ; X64: # %bb.0: -; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] -; X64-NEXT: vpdpwssd (%rdi), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x52,0x1f] -; X64-NEXT: vpdpwssd %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x52,0xc2] -; X64-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0] +; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vpdpwssd (%rdi), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x52,0x07] +; X64-NEXT: vpdpwssd %zmm2, %zmm1, %zmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x52,0xda] +; X64-NEXT: vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb] ; X64-NEXT: retq # encoding: [0xc3] %x2 = load <16 x i32>, <16 x i32>* %x2p - %res = call <16 x i32> @llvm.x86.avx512.mask.vpdpwssd.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) + %res0 = call <16 x i32> @llvm.x86.avx512.mask.vpdpwssd.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) %res1 = call <16 x i32> @llvm.x86.avx512.maskz.vpdpwssd.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4, i16 %x3) - %res2 = add <16 x i32> %res, %res1 - ret <16 x i32> %res2 + %res2 = insertvalue { <16 x i32>, <16 x i32> } poison, <16 x i32> %res0, 0 + %res3 = insertvalue { <16 x i32>, <16 x i32> } %res2, <16 x i32> %res1, 1 + ret { <16 x i32>, <16 x i32> } %res3 } declare <16 x i32> @llvm.x86.avx512.mask.vpdpwssds.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) @@ -128,29 +131,30 @@ ret <16 x i32> %res } -define <16 x i32>@test_int_x86_avx512_mask_vpdpwssds_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, <16 x i32> %x4, i16 %x3) { +define { <16 x i32>, <16 x i32> } @test_int_x86_avx512_mask_vpdpwssds_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, <16 x i32> %x4, i16 %x3) { ; X86-LABEL: test_int_x86_avx512_mask_vpdpwssds_512: ; X86: # %bb.0: +; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] -; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] -; X86-NEXT: vpdpwssds (%eax), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x53,0x18] -; X86-NEXT: vpdpwssds %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x53,0xc2] -; X86-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0] +; X86-NEXT: vpdpwssds (%eax), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x53,0x00] +; X86-NEXT: vpdpwssds %zmm2, %zmm1, %zmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x53,0xda] +; X86-NEXT: vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpdpwssds_512: ; X64: # %bb.0: -; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] -; X64-NEXT: vpdpwssds (%rdi), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x53,0x1f] -; X64-NEXT: vpdpwssds %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x53,0xc2] -; X64-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0] +; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vpdpwssds (%rdi), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x53,0x07] +; X64-NEXT: vpdpwssds %zmm2, %zmm1, %zmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x53,0xda] +; X64-NEXT: vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb] ; X64-NEXT: retq # encoding: [0xc3] %x2 = load <16 x i32>, <16 x i32>* %x2p - %res = call <16 x i32> @llvm.x86.avx512.mask.vpdpwssds.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) + %res0 = call <16 x i32> @llvm.x86.avx512.mask.vpdpwssds.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) %res1 = call <16 x i32> @llvm.x86.avx512.maskz.vpdpwssds.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4, i16 %x3) - %res2 = add <16 x i32> %res, %res1 - ret <16 x i32> %res2 + %res2 = insertvalue { <16 x i32>, <16 x i32> } poison, <16 x i32> %res0, 0 + %res3 = insertvalue { <16 x i32>, <16 x i32> } %res2, <16 x i32> %res1, 1 + ret { <16 x i32>, <16 x i32> } %res3 } diff --git a/llvm/test/CodeGen/X86/avx512vnni-intrinsics.ll b/llvm/test/CodeGen/X86/avx512vnni-intrinsics.ll --- a/llvm/test/CodeGen/X86/avx512vnni-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512vnni-intrinsics.ll @@ -4,7 +4,7 @@ declare <16 x i32> @llvm.x86.avx512.vpdpbusd.512(<16 x i32>, <16 x i32>, <16 x i32>) -define <16 x i32>@test_int_x86_avx512_ask_vpdpbusd_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) { +define <16 x i32> @test_int_x86_avx512_ask_vpdpbusd_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) { ; CHECK-LABEL: test_int_x86_avx512_ask_vpdpbusd_512: ; CHECK: # %bb.0: ; CHECK-NEXT: vpdpbusd %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0x50,0xc2] @@ -13,24 +13,24 @@ ret <16 x i32> %1 } -define <16 x i32>@test_int_x86_avx512_mask_vpdpbusd_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, <16 x i32> %x4, i16 %x3) { +define { <16 x i32>, <16 x i32> } @test_int_x86_avx512_mask_vpdpbusd_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, <16 x i32> %x4, i16 %x3) { ; X86-LABEL: test_int_x86_avx512_mask_vpdpbusd_512: ; X86: # %bb.0: +; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] -; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] -; X86-NEXT: vpdpbusd (%eax), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x50,0x18] -; X86-NEXT: vpdpbusd %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x50,0xc2] -; X86-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0] +; X86-NEXT: vpdpbusd (%eax), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x50,0x00] +; X86-NEXT: vpdpbusd %zmm2, %zmm1, %zmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x50,0xda] +; X86-NEXT: vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpdpbusd_512: ; X64: # %bb.0: -; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] -; X64-NEXT: vpdpbusd (%rdi), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x50,0x1f] -; X64-NEXT: vpdpbusd %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x50,0xc2] -; X64-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0] +; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vpdpbusd (%rdi), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x50,0x07] +; X64-NEXT: vpdpbusd %zmm2, %zmm1, %zmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x50,0xda] +; X64-NEXT: vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb] ; X64-NEXT: retq # encoding: [0xc3] %x2 = load <16 x i32>, <16 x i32>* %x2p %1 = call <16 x i32> @llvm.x86.avx512.vpdpbusd.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) @@ -39,8 +39,9 @@ %4 = call <16 x i32> @llvm.x86.avx512.vpdpbusd.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4) %5 = bitcast i16 %x3 to <16 x i1> %6 = select <16 x i1> %5, <16 x i32> %4, <16 x i32> zeroinitializer - %res3 = add <16 x i32> %3, %6 - ret <16 x i32> %res3 + %res1 = insertvalue { <16 x i32>, <16 x i32> } poison, <16 x i32> %3, 0 + %res2 = insertvalue { <16 x i32>, <16 x i32> } %res1, <16 x i32> %6, 1 + ret { <16 x i32>, <16 x i32> } %res2 } declare <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32>, <16 x i32>, <16 x i32>) @@ -54,24 +55,24 @@ ret <16 x i32> %1 } -define <16 x i32>@test_int_x86_avx512_mask_vpdpbusds_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, <16 x i32> %x4, i16 %x3) { +define { <16 x i32>, <16 x i32> } @test_int_x86_avx512_mask_vpdpbusds_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, <16 x i32> %x4, i16 %x3) { ; X86-LABEL: test_int_x86_avx512_mask_vpdpbusds_512: ; X86: # %bb.0: +; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] -; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] -; X86-NEXT: vpdpbusds (%eax), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x51,0x18] -; X86-NEXT: vpdpbusds %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x51,0xc2] -; X86-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0] +; X86-NEXT: vpdpbusds (%eax), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x51,0x00] +; X86-NEXT: vpdpbusds %zmm2, %zmm1, %zmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x51,0xda] +; X86-NEXT: vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpdpbusds_512: ; X64: # %bb.0: -; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] -; X64-NEXT: vpdpbusds (%rdi), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x51,0x1f] -; X64-NEXT: vpdpbusds %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x51,0xc2] -; X64-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0] +; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vpdpbusds (%rdi), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x51,0x07] +; X64-NEXT: vpdpbusds %zmm2, %zmm1, %zmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x51,0xda] +; X64-NEXT: vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb] ; X64-NEXT: retq # encoding: [0xc3] %x2 = load <16 x i32>, <16 x i32>* %x2p %1 = call <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) @@ -80,8 +81,9 @@ %4 = call <16 x i32> @llvm.x86.avx512.vpdpbusds.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4) %5 = bitcast i16 %x3 to <16 x i1> %6 = select <16 x i1> %5, <16 x i32> %4, <16 x i32> zeroinitializer - %res3 = add <16 x i32> %3, %6 - ret <16 x i32> %res3 + %res1 = insertvalue { <16 x i32>, <16 x i32> } poison, <16 x i32> %3, 0 + %res2 = insertvalue { <16 x i32>, <16 x i32> } %res1, <16 x i32> %6, 1 + ret { <16 x i32>, <16 x i32> } %res2 } declare <16 x i32> @llvm.x86.avx512.vpdpwssd.512(<16 x i32>, <16 x i32>, <16 x i32>) @@ -95,24 +97,24 @@ ret <16 x i32> %1 } -define <16 x i32>@test_int_x86_avx512_mask_vpdpwssd_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, <16 x i32> %x4, i16 %x3) { +define { <16 x i32>, <16 x i32> } @test_int_x86_avx512_mask_vpdpwssd_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, <16 x i32> %x4, i16 %x3) { ; X86-LABEL: test_int_x86_avx512_mask_vpdpwssd_512: ; X86: # %bb.0: +; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] -; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] -; X86-NEXT: vpdpwssd (%eax), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x52,0x18] -; X86-NEXT: vpdpwssd %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x52,0xc2] -; X86-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0] +; X86-NEXT: vpdpwssd (%eax), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x52,0x00] +; X86-NEXT: vpdpwssd %zmm2, %zmm1, %zmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x52,0xda] +; X86-NEXT: vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpdpwssd_512: ; X64: # %bb.0: -; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] -; X64-NEXT: vpdpwssd (%rdi), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x52,0x1f] -; X64-NEXT: vpdpwssd %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x52,0xc2] -; X64-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0] +; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vpdpwssd (%rdi), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x52,0x07] +; X64-NEXT: vpdpwssd %zmm2, %zmm1, %zmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x52,0xda] +; X64-NEXT: vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb] ; X64-NEXT: retq # encoding: [0xc3] %x2 = load <16 x i32>, <16 x i32>* %x2p %1 = call <16 x i32> @llvm.x86.avx512.vpdpwssd.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) @@ -121,8 +123,9 @@ %4 = call <16 x i32> @llvm.x86.avx512.vpdpwssd.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4) %5 = bitcast i16 %x3 to <16 x i1> %6 = select <16 x i1> %5, <16 x i32> %4, <16 x i32> zeroinitializer - %res3 = add <16 x i32> %3, %6 - ret <16 x i32> %res3 + %res1 = insertvalue { <16 x i32>, <16 x i32> } poison, <16 x i32> %3, 0 + %res2 = insertvalue { <16 x i32>, <16 x i32> } %res1, <16 x i32> %6, 1 + ret { <16 x i32>, <16 x i32> } %res2 } declare <16 x i32> @llvm.x86.avx512.vpdpwssds.512(<16 x i32>, <16 x i32>, <16 x i32>) @@ -136,24 +139,24 @@ ret <16 x i32> %1 } -define <16 x i32>@test_int_x86_avx512_mask_vpdpwssds_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, <16 x i32> %x4, i16 %x3) { +define { <16 x i32>, <16 x i32> } @test_int_x86_avx512_mask_vpdpwssds_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, <16 x i32> %x4, i16 %x3) { ; X86-LABEL: test_int_x86_avx512_mask_vpdpwssds_512: ; X86: # %bb.0: +; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] -; X86-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] -; X86-NEXT: vpdpwssds (%eax), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x53,0x18] -; X86-NEXT: vpdpwssds %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x53,0xc2] -; X86-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0] +; X86-NEXT: vpdpwssds (%eax), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x53,0x00] +; X86-NEXT: vpdpwssds %zmm2, %zmm1, %zmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x53,0xda] +; X86-NEXT: vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_mask_vpdpwssds_512: ; X64: # %bb.0: -; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] ; X64-NEXT: vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8] -; X64-NEXT: vpdpwssds (%rdi), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x53,0x1f] -; X64-NEXT: vpdpwssds %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x53,0xc2] -; X64-NEXT: vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0] +; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce] +; X64-NEXT: vpdpwssds (%rdi), %zmm1, %zmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x53,0x07] +; X64-NEXT: vpdpwssds %zmm2, %zmm1, %zmm3 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x53,0xda] +; X64-NEXT: vmovdqa64 %zmm3, %zmm1 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xcb] ; X64-NEXT: retq # encoding: [0xc3] %x2 = load <16 x i32>, <16 x i32>* %x2p %1 = call <16 x i32> @llvm.x86.avx512.vpdpwssds.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) @@ -162,6 +165,7 @@ %4 = call <16 x i32> @llvm.x86.avx512.vpdpwssds.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4) %5 = bitcast i16 %x3 to <16 x i1> %6 = select <16 x i1> %5, <16 x i32> %4, <16 x i32> zeroinitializer - %res3 = add <16 x i32> %3, %6 - ret <16 x i32> %res3 + %res1 = insertvalue { <16 x i32>, <16 x i32> } poison, <16 x i32> %3, 0 + %res2 = insertvalue { <16 x i32>, <16 x i32> } %res1, <16 x i32> %6, 1 + ret { <16 x i32>, <16 x i32> } %res2 } diff --git a/llvm/test/MC/Disassembler/AArch64/armv8.5a-specrestrict.txt b/llvm/test/MC/Disassembler/AArch64/armv8.5a-specrestrict.txt --- a/llvm/test/MC/Disassembler/AArch64/armv8.5a-specrestrict.txt +++ b/llvm/test/MC/Disassembler/AArch64/armv8.5a-specrestrict.txt @@ -1,6 +1,6 @@ # RUN: llvm-mc -triple=aarch64 -mattr=+specrestrict -disassemble < %s | FileCheck %s # RUN: llvm-mc -triple=aarch64 -mattr=+v8.5a -disassemble < %s | FileCheck %s -# RUN: llvm-mc -triple=aarch64 -mattr=+v8r -disassemble < %s | FileCheck %s --check-prefix=NOSPECID +# RUN: llvm-mc -triple=aarch64 -mattr=+v8r -disassemble < %s | FileCheck %s # RUN: llvm-mc -triple=aarch64 -mattr=-specrestrict -disassemble < %s | FileCheck %s --check-prefix=NOSPECID [0x81 0x03 0x38 0xd5] diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-02-01-ReturnAttrs.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-02-01-ReturnAttrs.ll --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-02-01-ReturnAttrs.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-02-01-ReturnAttrs.ll @@ -28,7 +28,7 @@ ; IS__CGSCC_NPM-NEXT: [[X_PRIV:%.*]] = alloca i32, align 4 ; IS__CGSCC_NPM-NEXT: store i32 [[TMP0]], i32* [[X_PRIV]], align 4 ; IS__CGSCC_NPM-NEXT: [[TMP2:%.*]] = load i32, i32* [[X_PRIV]], align 4 -; IS__CGSCC_NPM-NEXT: ret i32 [[TMP0]] +; IS__CGSCC_NPM-NEXT: ret i32 [[TMP2]] ; entry: %tmp2 = load i32, i32* %x, align 4 @@ -76,7 +76,7 @@ ;. ; IS__CGSCC_OPM: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind readonly willreturn } ; IS__CGSCC_OPM: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn } -; IS__CGSCC_OPM: attributes #[[ATTR2]] = { nounwind readonly willreturn } +; IS__CGSCC_OPM: attributes #[[ATTR2]] = { nosync nounwind readonly willreturn } ;. ; IS__CGSCC_NPM: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/attributes.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/attributes.ll --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/attributes.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/attributes.ll @@ -104,25 +104,15 @@ ; IS________OPM-NEXT: store <4 x i64> [[TMP]], <4 x i64>* [[ARG]], align 32 ; IS________OPM-NEXT: ret void ; -; IS__TUNIT_NPM: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable -; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@promote_avx2 -; IS__TUNIT_NPM-SAME: (<4 x i64>* noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(32) [[ARG:%.*]], <4 x i64> [[TMP0:%.*]]) #[[ATTR0]] { -; IS__TUNIT_NPM-NEXT: bb: -; IS__TUNIT_NPM-NEXT: [[ARG1_PRIV:%.*]] = alloca <4 x i64>, align 32 -; IS__TUNIT_NPM-NEXT: store <4 x i64> [[TMP0]], <4 x i64>* [[ARG1_PRIV]], align 32 -; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = load <4 x i64>, <4 x i64>* [[ARG1_PRIV]], align 32 -; IS__TUNIT_NPM-NEXT: store <4 x i64> [[TMP]], <4 x i64>* [[ARG]], align 32 -; IS__TUNIT_NPM-NEXT: ret void -; -; IS__CGSCC_NPM: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable -; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@promote_avx2 -; IS__CGSCC_NPM-SAME: (<4 x i64>* noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(32) [[ARG:%.*]], <4 x i64> [[TMP0:%.*]]) #[[ATTR0]] { -; IS__CGSCC_NPM-NEXT: bb: -; IS__CGSCC_NPM-NEXT: [[ARG1_PRIV:%.*]] = alloca <4 x i64>, align 32 -; IS__CGSCC_NPM-NEXT: store <4 x i64> [[TMP0]], <4 x i64>* [[ARG1_PRIV]], align 32 -; IS__CGSCC_NPM-NEXT: [[TMP:%.*]] = load <4 x i64>, <4 x i64>* [[ARG1_PRIV]], align 32 -; IS__CGSCC_NPM-NEXT: store <4 x i64> [[TMP0]], <4 x i64>* [[ARG]], align 32 -; IS__CGSCC_NPM-NEXT: ret void +; IS________NPM: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable +; IS________NPM-LABEL: define {{[^@]+}}@promote_avx2 +; IS________NPM-SAME: (<4 x i64>* noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(32) [[ARG:%.*]], <4 x i64> [[TMP0:%.*]]) #[[ATTR0]] { +; IS________NPM-NEXT: bb: +; IS________NPM-NEXT: [[ARG1_PRIV:%.*]] = alloca <4 x i64>, align 32 +; IS________NPM-NEXT: store <4 x i64> [[TMP0]], <4 x i64>* [[ARG1_PRIV]], align 32 +; IS________NPM-NEXT: [[TMP:%.*]] = load <4 x i64>, <4 x i64>* [[ARG1_PRIV]], align 32 +; IS________NPM-NEXT: store <4 x i64> [[TMP]], <4 x i64>* [[ARG]], align 32 +; IS________NPM-NEXT: ret void ; bb: %tmp = load <4 x i64>, <4 x i64>* %arg1 @@ -181,8 +171,7 @@ ; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 32 dereferenceable(32) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR3]] ; IS__CGSCC_NPM-NEXT: [[TMP0:%.*]] = load <4 x i64>, <4 x i64>* [[TMP]], align 32 ; IS__CGSCC_NPM-NEXT: call fastcc void @promote_avx2(<4 x i64>* noalias nocapture nofree noundef nonnull writeonly align 32 dereferenceable(32) [[TMP2]], <4 x i64> [[TMP0]]) #[[ATTR4]] -; IS__CGSCC_NPM-NEXT: [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 32 -; IS__CGSCC_NPM-NEXT: store <4 x i64> [[TMP4]], <4 x i64>* [[ARG]], align 2 +; IS__CGSCC_NPM-NEXT: store <4 x i64> [[TMP0]], <4 x i64>* [[ARG]], align 2 ; IS__CGSCC_NPM-NEXT: ret void ; bb: diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll @@ -19,25 +19,15 @@ ; IS________OPM-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 64 ; IS________OPM-NEXT: ret void ; -; IS__TUNIT_NPM: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable -; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512 -; IS__TUNIT_NPM-SAME: (<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] { -; IS__TUNIT_NPM-NEXT: bb: -; IS__TUNIT_NPM-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64 -; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]], align 64 -; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 64 -; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 64 -; IS__TUNIT_NPM-NEXT: ret void -; -; IS__CGSCC_NPM: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable -; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512 -; IS__CGSCC_NPM-SAME: (<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] { -; IS__CGSCC_NPM-NEXT: bb: -; IS__CGSCC_NPM-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64 -; IS__CGSCC_NPM-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]], align 64 -; IS__CGSCC_NPM-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 64 -; IS__CGSCC_NPM-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG]], align 64 -; IS__CGSCC_NPM-NEXT: ret void +; IS________NPM: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable +; IS________NPM-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512 +; IS________NPM-SAME: (<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] { +; IS________NPM-NEXT: bb: +; IS________NPM-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64 +; IS________NPM-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]], align 64 +; IS________NPM-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 64 +; IS________NPM-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 64 +; IS________NPM-NEXT: ret void ; bb: %tmp = load <8 x i64>, <8 x i64>* %arg1 @@ -97,8 +87,7 @@ ; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR6:[0-9]+]] ; IS__CGSCC_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64 ; IS__CGSCC_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR7:[0-9]+]] -; IS__CGSCC_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 -; IS__CGSCC_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 +; IS__CGSCC_NPM-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG]], align 2 ; IS__CGSCC_NPM-NEXT: ret void ; bb: @@ -123,25 +112,15 @@ ; IS________OPM-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 64 ; IS________OPM-NEXT: ret void ; -; IS__TUNIT_NPM: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable -; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256 -; IS__TUNIT_NPM-SAME: (<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR1:[0-9]+]] { -; IS__TUNIT_NPM-NEXT: bb: -; IS__TUNIT_NPM-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64 -; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]], align 64 -; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 64 -; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 64 -; IS__TUNIT_NPM-NEXT: ret void -; -; IS__CGSCC_NPM: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable -; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256 -; IS__CGSCC_NPM-SAME: (<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR1:[0-9]+]] { -; IS__CGSCC_NPM-NEXT: bb: -; IS__CGSCC_NPM-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64 -; IS__CGSCC_NPM-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]], align 64 -; IS__CGSCC_NPM-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 64 -; IS__CGSCC_NPM-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG]], align 64 -; IS__CGSCC_NPM-NEXT: ret void +; IS________NPM: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable +; IS________NPM-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256 +; IS________NPM-SAME: (<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR1:[0-9]+]] { +; IS________NPM-NEXT: bb: +; IS________NPM-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64 +; IS________NPM-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]], align 64 +; IS________NPM-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 64 +; IS________NPM-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 64 +; IS________NPM-NEXT: ret void ; bb: %tmp = load <8 x i64>, <8 x i64>* %arg1 @@ -201,8 +180,7 @@ ; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR6]] ; IS__CGSCC_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64 ; IS__CGSCC_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR7]] -; IS__CGSCC_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 -; IS__CGSCC_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 +; IS__CGSCC_NPM-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG]], align 2 ; IS__CGSCC_NPM-NEXT: ret void ; bb: @@ -227,25 +205,15 @@ ; IS________OPM-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 64 ; IS________OPM-NEXT: ret void ; -; IS__TUNIT_NPM: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable -; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256 -; IS__TUNIT_NPM-SAME: (<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR1]] { -; IS__TUNIT_NPM-NEXT: bb: -; IS__TUNIT_NPM-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64 -; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]], align 64 -; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 64 -; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 64 -; IS__TUNIT_NPM-NEXT: ret void -; -; IS__CGSCC_NPM: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable -; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256 -; IS__CGSCC_NPM-SAME: (<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR1]] { -; IS__CGSCC_NPM-NEXT: bb: -; IS__CGSCC_NPM-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64 -; IS__CGSCC_NPM-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]], align 64 -; IS__CGSCC_NPM-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 64 -; IS__CGSCC_NPM-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG]], align 64 -; IS__CGSCC_NPM-NEXT: ret void +; IS________NPM: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable +; IS________NPM-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256 +; IS________NPM-SAME: (<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR1]] { +; IS________NPM-NEXT: bb: +; IS________NPM-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64 +; IS________NPM-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]], align 64 +; IS________NPM-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 64 +; IS________NPM-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 64 +; IS________NPM-NEXT: ret void ; bb: %tmp = load <8 x i64>, <8 x i64>* %arg1 @@ -305,8 +273,7 @@ ; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR6]] ; IS__CGSCC_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64 ; IS__CGSCC_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR7]] -; IS__CGSCC_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 -; IS__CGSCC_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 +; IS__CGSCC_NPM-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG]], align 2 ; IS__CGSCC_NPM-NEXT: ret void ; bb: @@ -331,25 +298,15 @@ ; IS________OPM-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 64 ; IS________OPM-NEXT: ret void ; -; IS__TUNIT_NPM: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable -; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512 -; IS__TUNIT_NPM-SAME: (<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR0]] { -; IS__TUNIT_NPM-NEXT: bb: -; IS__TUNIT_NPM-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64 -; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]], align 64 -; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 64 -; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 64 -; IS__TUNIT_NPM-NEXT: ret void -; -; IS__CGSCC_NPM: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable -; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512 -; IS__CGSCC_NPM-SAME: (<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR0]] { -; IS__CGSCC_NPM-NEXT: bb: -; IS__CGSCC_NPM-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64 -; IS__CGSCC_NPM-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]], align 64 -; IS__CGSCC_NPM-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 64 -; IS__CGSCC_NPM-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG]], align 64 -; IS__CGSCC_NPM-NEXT: ret void +; IS________NPM: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable +; IS________NPM-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512 +; IS________NPM-SAME: (<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR0]] { +; IS________NPM-NEXT: bb: +; IS________NPM-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64 +; IS________NPM-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]], align 64 +; IS________NPM-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 64 +; IS________NPM-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 64 +; IS________NPM-NEXT: ret void ; bb: %tmp = load <8 x i64>, <8 x i64>* %arg1 @@ -409,8 +366,7 @@ ; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR6]] ; IS__CGSCC_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64 ; IS__CGSCC_NPM-NEXT: call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR7]] -; IS__CGSCC_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 -; IS__CGSCC_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 +; IS__CGSCC_NPM-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG]], align 2 ; IS__CGSCC_NPM-NEXT: ret void ; bb: @@ -437,7 +393,7 @@ ; ; IS________NPM: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable ; IS________NPM-LABEL: define {{[^@]+}}@callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256 -; IS________NPM-SAME: (<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64>* noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[ARG1:%.*]]) #[[ATTR1:[0-9]+]] { +; IS________NPM-SAME: (<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64>* noalias nocapture nofree noundef nonnull readonly align 64 dereferenceable(64) [[ARG1:%.*]]) #[[ATTR1]] { ; IS________NPM-NEXT: bb: ; IS________NPM-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1]], align 64 ; IS________NPM-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 64 @@ -615,25 +571,15 @@ ; IS________OPM-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 64 ; IS________OPM-NEXT: ret void ; -; IS__TUNIT_NPM: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable -; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256 -; IS__TUNIT_NPM-SAME: (<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR3:[0-9]+]] { -; IS__TUNIT_NPM-NEXT: bb: -; IS__TUNIT_NPM-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64 -; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]], align 64 -; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 64 -; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 64 -; IS__TUNIT_NPM-NEXT: ret void -; -; IS__CGSCC_NPM: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable -; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256 -; IS__CGSCC_NPM-SAME: (<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR3:[0-9]+]] { -; IS__CGSCC_NPM-NEXT: bb: -; IS__CGSCC_NPM-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64 -; IS__CGSCC_NPM-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]], align 64 -; IS__CGSCC_NPM-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 64 -; IS__CGSCC_NPM-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG]], align 64 -; IS__CGSCC_NPM-NEXT: ret void +; IS________NPM: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable +; IS________NPM-LABEL: define {{[^@]+}}@callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256 +; IS________NPM-SAME: (<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR3:[0-9]+]] { +; IS________NPM-NEXT: bb: +; IS________NPM-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64 +; IS________NPM-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]], align 64 +; IS________NPM-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 64 +; IS________NPM-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 64 +; IS________NPM-NEXT: ret void ; bb: %tmp = load <8 x i64>, <8 x i64>* %arg1 @@ -693,8 +639,7 @@ ; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR6]] ; IS__CGSCC_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64 ; IS__CGSCC_NPM-NEXT: call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR7]] -; IS__CGSCC_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 -; IS__CGSCC_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 +; IS__CGSCC_NPM-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG]], align 2 ; IS__CGSCC_NPM-NEXT: ret void ; bb: @@ -719,25 +664,15 @@ ; IS________OPM-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 64 ; IS________OPM-NEXT: ret void ; -; IS__TUNIT_NPM: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable -; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256 -; IS__TUNIT_NPM-SAME: (<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR4]] { -; IS__TUNIT_NPM-NEXT: bb: -; IS__TUNIT_NPM-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64 -; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]], align 64 -; IS__TUNIT_NPM-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 64 -; IS__TUNIT_NPM-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 64 -; IS__TUNIT_NPM-NEXT: ret void -; -; IS__CGSCC_NPM: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable -; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256 -; IS__CGSCC_NPM-SAME: (<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR4]] { -; IS__CGSCC_NPM-NEXT: bb: -; IS__CGSCC_NPM-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64 -; IS__CGSCC_NPM-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]], align 64 -; IS__CGSCC_NPM-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 64 -; IS__CGSCC_NPM-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG]], align 64 -; IS__CGSCC_NPM-NEXT: ret void +; IS________NPM: Function Attrs: argmemonly inlinehint nofree norecurse nosync nounwind willreturn uwtable +; IS________NPM-LABEL: define {{[^@]+}}@callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256 +; IS________NPM-SAME: (<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR4:[0-9]+]] { +; IS________NPM-NEXT: bb: +; IS________NPM-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64 +; IS________NPM-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]], align 64 +; IS________NPM-NEXT: [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 64 +; IS________NPM-NEXT: store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 64 +; IS________NPM-NEXT: ret void ; bb: %tmp = load <8 x i64>, <8 x i64>* %arg1 @@ -797,8 +732,7 @@ ; IS__CGSCC_NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP3]], i8 noundef 0, i64 noundef 32, i1 noundef false) #[[ATTR6]] ; IS__CGSCC_NPM-NEXT: [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 64 ; IS__CGSCC_NPM-NEXT: call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* noalias nocapture nofree noundef nonnull writeonly align 64 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]]) #[[ATTR7]] -; IS__CGSCC_NPM-NEXT: [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 64 -; IS__CGSCC_NPM-NEXT: store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2 +; IS__CGSCC_NPM-NEXT: store <8 x i64> [[TMP0]], <8 x i64>* [[ARG]], align 2 ; IS__CGSCC_NPM-NEXT: ret void ; bb: diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/inalloca.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/inalloca.ll --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/inalloca.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/inalloca.ll @@ -85,5 +85,5 @@ ;. ; IS__CGSCC____: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind readonly willreturn } ; IS__CGSCC____: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn } -; IS__CGSCC____: attributes #[[ATTR2]] = { nounwind readonly willreturn } +; IS__CGSCC____: attributes #[[ATTR2]] = { nosync nounwind readonly willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead_2.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead_2.ll --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead_2.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead_2.ll @@ -78,5 +78,5 @@ ; IS__CGSCC____: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind willreturn writeonly } ; IS__CGSCC____: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn } ; IS__CGSCC____: attributes #[[ATTR2]] = { nofree nosync nounwind willreturn writeonly } -; IS__CGSCC____: attributes #[[ATTR3]] = { nounwind willreturn writeonly } +; IS__CGSCC____: attributes #[[ATTR3]] = { nosync nounwind willreturn writeonly } ;. diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/variadic.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/variadic.ll --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/variadic.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/variadic.ll @@ -35,7 +35,7 @@ ; Function Attrs: nounwind uwtable define internal void @callee_t0f(i8* nocapture readnone %tp13, i8* nocapture readnone %tp14, i8* nocapture readnone %tp15, i8* nocapture readnone %tp16, i8* nocapture readnone %tp17, ...) { ; CHECK-LABEL: define {{[^@]+}}@callee_t0f -; CHECK-SAME: (i8* noalias nocapture nofree nonnull readnone [[TP13:%.*]], i8* noalias nocapture nofree nonnull readnone [[TP14:%.*]], i8* noalias nocapture nofree nonnull readnone [[TP15:%.*]], i8* noalias nocapture nofree nonnull readnone [[TP16:%.*]], i8* noalias nocapture nofree nonnull readnone [[TP17:%.*]], ...) { +; CHECK-SAME: (i8* noalias nocapture nofree nonnull readnone align 4294967296 [[TP13:%.*]], i8* noalias nocapture nofree nonnull readnone align 4294967296 [[TP14:%.*]], i8* noalias nocapture nofree nonnull readnone align 4294967296 [[TP15:%.*]], i8* noalias nocapture nofree nonnull readnone align 4294967296 [[TP16:%.*]], i8* noalias nocapture nofree nonnull readnone align 4294967296 [[TP17:%.*]], ...) { ; CHECK-NEXT: entry: ; CHECK-NEXT: call void @sink(i32 noundef 0) ; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/2009-09-24-byval-ptr.ll b/llvm/test/Transforms/Attributor/IPConstantProp/2009-09-24-byval-ptr.ll --- a/llvm/test/Transforms/Attributor/IPConstantProp/2009-09-24-byval-ptr.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/2009-09-24-byval-ptr.ll @@ -120,8 +120,8 @@ ; IS__CGSCC_NPM-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 ; IS__CGSCC_NPM-NEXT: [[TMP4:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* [[U_PRIV]], i32 0, i32 0 ; IS__CGSCC_NPM-NEXT: [[TMP5:%.*]] = load i8, i8* [[TMP4]], align 8 -; IS__CGSCC_NPM-NEXT: [[TMP6:%.*]] = zext i8 [[TMP0]] to i32 -; IS__CGSCC_NPM-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], [[TMP1]] +; IS__CGSCC_NPM-NEXT: [[TMP6:%.*]] = zext i8 [[TMP5]] to i32 +; IS__CGSCC_NPM-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], [[TMP3]] ; IS__CGSCC_NPM-NEXT: ret i32 [[TMP7]] ; entry: diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/PR16052.ll b/llvm/test/Transforms/Attributor/IPConstantProp/PR16052.ll --- a/llvm/test/Transforms/Attributor/IPConstantProp/PR16052.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/PR16052.ll @@ -32,22 +32,13 @@ define i64 @fn2b(i32 %arg) { ; -; IS________OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn -; IS________OPM-LABEL: define {{[^@]+}}@fn2b -; IS________OPM-SAME: (i32 [[ARG:%.*]]) #[[ATTR0]] { -; IS________OPM-NEXT: entry: -; IS________OPM-NEXT: [[CONV:%.*]] = sext i32 [[ARG]] to i64 -; IS________OPM-NEXT: [[DIV:%.*]] = sdiv i64 8, [[CONV]] -; IS________OPM-NEXT: [[CALL2:%.*]] = call i64 @fn1(i64 [[DIV]]) #[[ATTR1:[0-9]+]] -; IS________OPM-NEXT: ret i64 [[CALL2]] -; -; IS________NPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn -; IS________NPM-LABEL: define {{[^@]+}}@fn2b -; IS________NPM-SAME: (i32 [[ARG:%.*]]) #[[ATTR0]] { -; IS________NPM-NEXT: entry: -; IS________NPM-NEXT: [[CONV:%.*]] = sext i32 [[ARG]] to i64 -; IS________NPM-NEXT: [[DIV:%.*]] = sdiv i64 8, [[CONV]] -; IS________NPM-NEXT: ret i64 [[DIV]] +; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; CHECK-LABEL: define {{[^@]+}}@fn2b +; CHECK-SAME: (i32 [[ARG:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CONV:%.*]] = sext i32 [[ARG]] to i64 +; CHECK-NEXT: [[DIV:%.*]] = sdiv i64 8, [[CONV]] +; CHECK-NEXT: ret i64 [[DIV]] ; entry: %conv = sext i32 %arg to i64 @@ -79,17 +70,11 @@ } define internal i64 @fn1(i64 %p1) { -; IS________OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn -; IS________OPM-LABEL: define {{[^@]+}}@fn1 -; IS________OPM-SAME: (i64 returned [[P1:%.*]]) #[[ATTR0]] { -; IS________OPM-NEXT: entry: -; IS________OPM-NEXT: ret i64 [[P1]] -; -; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn -; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@fn1 -; IS__CGSCC_NPM-SAME: (i64 [[P1:%.*]]) #[[ATTR0]] { -; IS__CGSCC_NPM-NEXT: entry: -; IS__CGSCC_NPM-NEXT: ret i64 undef +; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; IS__CGSCC____-LABEL: define {{[^@]+}}@fn1 +; IS__CGSCC____-SAME: (i64 [[P1:%.*]]) #[[ATTR0]] { +; IS__CGSCC____-NEXT: entry: +; IS__CGSCC____-NEXT: ret i64 undef ; entry: %tobool = icmp ne i64 %p1, 0 @@ -97,11 +82,5 @@ ret i64 %cond } ;. -; IS__TUNIT_OPM: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } -; IS__TUNIT_OPM: attributes #[[ATTR1]] = { nofree nosync nounwind readnone willreturn } -;. -; IS________NPM: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } -;. -; IS__CGSCC_OPM: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } -; IS__CGSCC_OPM: attributes #[[ATTR1]] = { readnone willreturn } +; CHECK: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/PR26044.ll b/llvm/test/Transforms/Attributor/IPConstantProp/PR26044.ll --- a/llvm/test/Transforms/Attributor/IPConstantProp/PR26044.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/PR26044.ll @@ -8,67 +8,35 @@ define void @fn2(i32* %P, i1 %C) { ; -; IS__TUNIT_OPM: Function Attrs: argmemonly nofree norecurse nosync nounwind -; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@fn2 -; IS__TUNIT_OPM-SAME: (i32* nocapture nofree [[P:%.*]], i1 [[C:%.*]]) #[[ATTR0:[0-9]+]] { -; IS__TUNIT_OPM-NEXT: entry: -; IS__TUNIT_OPM-NEXT: br label [[IF_END:%.*]] -; IS__TUNIT_OPM: for.cond1: -; IS__TUNIT_OPM-NEXT: br i1 [[C]], label [[IF_END]], label [[EXIT:%.*]] -; IS__TUNIT_OPM: if.end: -; IS__TUNIT_OPM-NEXT: [[E_2:%.*]] = phi i32* [ [[P]], [[ENTRY:%.*]] ], [ null, [[FOR_COND1:%.*]] ] -; IS__TUNIT_OPM-NEXT: [[TMP0:%.*]] = load i32, i32* [[E_2]], align 4 -; IS__TUNIT_OPM-NEXT: [[CALL:%.*]] = call i32 @fn1(i32 [[TMP0]]) #[[ATTR3:[0-9]+]] -; IS__TUNIT_OPM-NEXT: store i32 [[CALL]], i32* [[P]], align 4 -; IS__TUNIT_OPM-NEXT: br label [[FOR_COND1]] -; IS__TUNIT_OPM: exit: -; IS__TUNIT_OPM-NEXT: ret void +; IS__TUNIT____: Function Attrs: argmemonly nofree norecurse nosync nounwind +; IS__TUNIT____-LABEL: define {{[^@]+}}@fn2 +; IS__TUNIT____-SAME: (i32* nocapture nofree [[P:%.*]], i1 [[C:%.*]]) #[[ATTR0:[0-9]+]] { +; IS__TUNIT____-NEXT: entry: +; IS__TUNIT____-NEXT: br label [[IF_END:%.*]] +; IS__TUNIT____: for.cond1: +; IS__TUNIT____-NEXT: br i1 [[C]], label [[IF_END]], label [[EXIT:%.*]] +; IS__TUNIT____: if.end: +; IS__TUNIT____-NEXT: [[E_2:%.*]] = phi i32* [ [[P]], [[ENTRY:%.*]] ], [ null, [[FOR_COND1:%.*]] ] +; IS__TUNIT____-NEXT: [[TMP0:%.*]] = load i32, i32* [[E_2]], align 4 +; IS__TUNIT____-NEXT: store i32 [[TMP0]], i32* [[P]], align 4 +; IS__TUNIT____-NEXT: br label [[FOR_COND1]] +; IS__TUNIT____: exit: +; IS__TUNIT____-NEXT: ret void ; -; IS__TUNIT_NPM: Function Attrs: argmemonly nofree norecurse nosync nounwind -; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@fn2 -; IS__TUNIT_NPM-SAME: (i32* nocapture nofree [[P:%.*]], i1 [[C:%.*]]) #[[ATTR0:[0-9]+]] { -; IS__TUNIT_NPM-NEXT: entry: -; IS__TUNIT_NPM-NEXT: br label [[IF_END:%.*]] -; IS__TUNIT_NPM: for.cond1: -; IS__TUNIT_NPM-NEXT: br i1 [[C]], label [[IF_END]], label [[EXIT:%.*]] -; IS__TUNIT_NPM: if.end: -; IS__TUNIT_NPM-NEXT: [[E_2:%.*]] = phi i32* [ [[P]], [[ENTRY:%.*]] ], [ null, [[FOR_COND1:%.*]] ] -; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load i32, i32* [[E_2]], align 4 -; IS__TUNIT_NPM-NEXT: store i32 [[TMP0]], i32* [[P]], align 4 -; IS__TUNIT_NPM-NEXT: br label [[FOR_COND1]] -; IS__TUNIT_NPM: exit: -; IS__TUNIT_NPM-NEXT: ret void -; -; IS__CGSCC_OPM: Function Attrs: argmemonly nofree norecurse nosync nounwind -; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@fn2 -; IS__CGSCC_OPM-SAME: (i32* nocapture nofree nonnull align 4 dereferenceable(4) [[P:%.*]], i1 [[C:%.*]]) #[[ATTR0:[0-9]+]] { -; IS__CGSCC_OPM-NEXT: entry: -; IS__CGSCC_OPM-NEXT: br label [[IF_END:%.*]] -; IS__CGSCC_OPM: for.cond1: -; IS__CGSCC_OPM-NEXT: br i1 [[C]], label [[IF_END]], label [[EXIT:%.*]] -; IS__CGSCC_OPM: if.end: -; IS__CGSCC_OPM-NEXT: [[E_2:%.*]] = phi i32* [ [[P]], [[ENTRY:%.*]] ], [ null, [[FOR_COND1:%.*]] ] -; IS__CGSCC_OPM-NEXT: [[TMP0:%.*]] = load i32, i32* [[E_2]], align 4 -; IS__CGSCC_OPM-NEXT: [[CALL:%.*]] = call i32 @fn1(i32 [[TMP0]]) -; IS__CGSCC_OPM-NEXT: store i32 [[CALL]], i32* [[P]], align 4 -; IS__CGSCC_OPM-NEXT: br label [[FOR_COND1]] -; IS__CGSCC_OPM: exit: -; IS__CGSCC_OPM-NEXT: ret void -; -; IS__CGSCC_NPM: Function Attrs: argmemonly nofree norecurse nosync nounwind -; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@fn2 -; IS__CGSCC_NPM-SAME: (i32* nocapture nofree nonnull align 4 dereferenceable(4) [[P:%.*]], i1 [[C:%.*]]) #[[ATTR0:[0-9]+]] { -; IS__CGSCC_NPM-NEXT: entry: -; IS__CGSCC_NPM-NEXT: br label [[IF_END:%.*]] -; IS__CGSCC_NPM: for.cond1: -; IS__CGSCC_NPM-NEXT: br i1 [[C]], label [[IF_END]], label [[EXIT:%.*]] -; IS__CGSCC_NPM: if.end: -; IS__CGSCC_NPM-NEXT: [[E_2:%.*]] = phi i32* [ [[P]], [[ENTRY:%.*]] ], [ null, [[FOR_COND1:%.*]] ] -; IS__CGSCC_NPM-NEXT: [[TMP0:%.*]] = load i32, i32* [[E_2]], align 4 -; IS__CGSCC_NPM-NEXT: store i32 [[TMP0]], i32* [[P]], align 4 -; IS__CGSCC_NPM-NEXT: br label [[FOR_COND1]] -; IS__CGSCC_NPM: exit: -; IS__CGSCC_NPM-NEXT: ret void +; IS__CGSCC____: Function Attrs: argmemonly nofree norecurse nosync nounwind +; IS__CGSCC____-LABEL: define {{[^@]+}}@fn2 +; IS__CGSCC____-SAME: (i32* nocapture nofree nonnull align 4 dereferenceable(4) [[P:%.*]], i1 [[C:%.*]]) #[[ATTR0:[0-9]+]] { +; IS__CGSCC____-NEXT: entry: +; IS__CGSCC____-NEXT: br label [[IF_END:%.*]] +; IS__CGSCC____: for.cond1: +; IS__CGSCC____-NEXT: br i1 [[C]], label [[IF_END]], label [[EXIT:%.*]] +; IS__CGSCC____: if.end: +; IS__CGSCC____-NEXT: [[E_2:%.*]] = phi i32* [ [[P]], [[ENTRY:%.*]] ], [ null, [[FOR_COND1:%.*]] ] +; IS__CGSCC____-NEXT: [[TMP0:%.*]] = load i32, i32* [[E_2]], align 4 +; IS__CGSCC____-NEXT: store i32 [[TMP0]], i32* [[P]], align 4 +; IS__CGSCC____-NEXT: br label [[FOR_COND1]] +; IS__CGSCC____: exit: +; IS__CGSCC____-NEXT: ret void ; entry: br label %if.end @@ -87,17 +55,11 @@ } define internal i32 @fn1(i32 %p1) { -; IS________OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn -; IS________OPM-LABEL: define {{[^@]+}}@fn1 -; IS________OPM-SAME: (i32 returned [[P1:%.*]]) #[[ATTR1:[0-9]+]] { -; IS________OPM-NEXT: entry: -; IS________OPM-NEXT: ret i32 [[P1]] -; -; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn -; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@fn1 -; IS__CGSCC_NPM-SAME: (i32 [[P1:%.*]]) #[[ATTR1:[0-9]+]] { -; IS__CGSCC_NPM-NEXT: entry: -; IS__CGSCC_NPM-NEXT: ret i32 undef +; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; IS__CGSCC____-LABEL: define {{[^@]+}}@fn1 +; IS__CGSCC____-SAME: (i32 [[P1:%.*]]) #[[ATTR1:[0-9]+]] { +; IS__CGSCC____-NEXT: entry: +; IS__CGSCC____-NEXT: ret i32 undef ; entry: %tobool = icmp ne i32 %p1, 0 @@ -107,67 +69,35 @@ define void @fn_no_null_opt(i32* %P, i1 %C) null_pointer_is_valid { ; -; IS__TUNIT_OPM: Function Attrs: nofree norecurse nosync nounwind null_pointer_is_valid -; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@fn_no_null_opt -; IS__TUNIT_OPM-SAME: (i32* nocapture nofree writeonly [[P:%.*]], i1 [[C:%.*]]) #[[ATTR2:[0-9]+]] { -; IS__TUNIT_OPM-NEXT: entry: -; IS__TUNIT_OPM-NEXT: br label [[IF_END:%.*]] -; IS__TUNIT_OPM: for.cond1: -; IS__TUNIT_OPM-NEXT: br i1 [[C]], label [[IF_END]], label [[EXIT:%.*]] -; IS__TUNIT_OPM: if.end: -; IS__TUNIT_OPM-NEXT: [[E_2:%.*]] = phi i32* [ undef, [[ENTRY:%.*]] ], [ null, [[FOR_COND1:%.*]] ] -; IS__TUNIT_OPM-NEXT: [[TMP0:%.*]] = load i32, i32* null, align 4 -; IS__TUNIT_OPM-NEXT: [[CALL:%.*]] = call i32 @fn0(i32 [[TMP0]]) #[[ATTR3]] -; IS__TUNIT_OPM-NEXT: store i32 [[CALL]], i32* [[P]], align 4 -; IS__TUNIT_OPM-NEXT: br label [[FOR_COND1]] -; IS__TUNIT_OPM: exit: -; IS__TUNIT_OPM-NEXT: ret void -; -; IS__TUNIT_NPM: Function Attrs: nofree norecurse nosync nounwind null_pointer_is_valid -; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@fn_no_null_opt -; IS__TUNIT_NPM-SAME: (i32* nocapture nofree writeonly [[P:%.*]], i1 [[C:%.*]]) #[[ATTR1:[0-9]+]] { -; IS__TUNIT_NPM-NEXT: entry: -; IS__TUNIT_NPM-NEXT: br label [[IF_END:%.*]] -; IS__TUNIT_NPM: for.cond1: -; IS__TUNIT_NPM-NEXT: br i1 [[C]], label [[IF_END]], label [[EXIT:%.*]] -; IS__TUNIT_NPM: if.end: -; IS__TUNIT_NPM-NEXT: [[E_2:%.*]] = phi i32* [ undef, [[ENTRY:%.*]] ], [ null, [[FOR_COND1:%.*]] ] -; IS__TUNIT_NPM-NEXT: [[TMP0:%.*]] = load i32, i32* null, align 4 -; IS__TUNIT_NPM-NEXT: store i32 [[TMP0]], i32* [[P]], align 4 -; IS__TUNIT_NPM-NEXT: br label [[FOR_COND1]] -; IS__TUNIT_NPM: exit: -; IS__TUNIT_NPM-NEXT: ret void +; IS__TUNIT____: Function Attrs: nofree norecurse nosync nounwind null_pointer_is_valid +; IS__TUNIT____-LABEL: define {{[^@]+}}@fn_no_null_opt +; IS__TUNIT____-SAME: (i32* nocapture nofree writeonly [[P:%.*]], i1 [[C:%.*]]) #[[ATTR1:[0-9]+]] { +; IS__TUNIT____-NEXT: entry: +; IS__TUNIT____-NEXT: br label [[IF_END:%.*]] +; IS__TUNIT____: for.cond1: +; IS__TUNIT____-NEXT: br i1 [[C]], label [[IF_END]], label [[EXIT:%.*]] +; IS__TUNIT____: if.end: +; IS__TUNIT____-NEXT: [[E_2:%.*]] = phi i32* [ undef, [[ENTRY:%.*]] ], [ null, [[FOR_COND1:%.*]] ] +; IS__TUNIT____-NEXT: [[TMP0:%.*]] = load i32, i32* null, align 4294967296 +; IS__TUNIT____-NEXT: store i32 [[TMP0]], i32* [[P]], align 4 +; IS__TUNIT____-NEXT: br label [[FOR_COND1]] +; IS__TUNIT____: exit: +; IS__TUNIT____-NEXT: ret void ; -; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind null_pointer_is_valid -; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@fn_no_null_opt -; IS__CGSCC_OPM-SAME: (i32* nocapture nofree writeonly align 4 dereferenceable_or_null(4) [[P:%.*]], i1 [[C:%.*]]) #[[ATTR2:[0-9]+]] { -; IS__CGSCC_OPM-NEXT: entry: -; IS__CGSCC_OPM-NEXT: br label [[IF_END:%.*]] -; IS__CGSCC_OPM: for.cond1: -; IS__CGSCC_OPM-NEXT: br i1 [[C]], label [[IF_END]], label [[EXIT:%.*]] -; IS__CGSCC_OPM: if.end: -; IS__CGSCC_OPM-NEXT: [[E_2:%.*]] = phi i32* [ undef, [[ENTRY:%.*]] ], [ null, [[FOR_COND1:%.*]] ] -; IS__CGSCC_OPM-NEXT: [[TMP0:%.*]] = load i32, i32* null, align 4294967296 -; IS__CGSCC_OPM-NEXT: [[CALL:%.*]] = call i32 @fn0(i32 [[TMP0]]) -; IS__CGSCC_OPM-NEXT: store i32 [[CALL]], i32* [[P]], align 4 -; IS__CGSCC_OPM-NEXT: br label [[FOR_COND1]] -; IS__CGSCC_OPM: exit: -; IS__CGSCC_OPM-NEXT: ret void -; -; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind null_pointer_is_valid -; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@fn_no_null_opt -; IS__CGSCC_NPM-SAME: (i32* nocapture nofree writeonly align 4 dereferenceable_or_null(4) [[P:%.*]], i1 [[C:%.*]]) #[[ATTR2:[0-9]+]] { -; IS__CGSCC_NPM-NEXT: entry: -; IS__CGSCC_NPM-NEXT: br label [[IF_END:%.*]] -; IS__CGSCC_NPM: for.cond1: -; IS__CGSCC_NPM-NEXT: br i1 [[C]], label [[IF_END]], label [[EXIT:%.*]] -; IS__CGSCC_NPM: if.end: -; IS__CGSCC_NPM-NEXT: [[E_2:%.*]] = phi i32* [ undef, [[ENTRY:%.*]] ], [ null, [[FOR_COND1:%.*]] ] -; IS__CGSCC_NPM-NEXT: [[TMP0:%.*]] = load i32, i32* null, align 4294967296 -; IS__CGSCC_NPM-NEXT: store i32 [[TMP0]], i32* [[P]], align 4 -; IS__CGSCC_NPM-NEXT: br label [[FOR_COND1]] -; IS__CGSCC_NPM: exit: -; IS__CGSCC_NPM-NEXT: ret void +; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind null_pointer_is_valid +; IS__CGSCC____-LABEL: define {{[^@]+}}@fn_no_null_opt +; IS__CGSCC____-SAME: (i32* nocapture nofree writeonly align 4 dereferenceable_or_null(4) [[P:%.*]], i1 [[C:%.*]]) #[[ATTR2:[0-9]+]] { +; IS__CGSCC____-NEXT: entry: +; IS__CGSCC____-NEXT: br label [[IF_END:%.*]] +; IS__CGSCC____: for.cond1: +; IS__CGSCC____-NEXT: br i1 [[C]], label [[IF_END]], label [[EXIT:%.*]] +; IS__CGSCC____: if.end: +; IS__CGSCC____-NEXT: [[E_2:%.*]] = phi i32* [ undef, [[ENTRY:%.*]] ], [ null, [[FOR_COND1:%.*]] ] +; IS__CGSCC____-NEXT: [[TMP0:%.*]] = load i32, i32* null, align 4294967296 +; IS__CGSCC____-NEXT: store i32 [[TMP0]], i32* [[P]], align 4 +; IS__CGSCC____-NEXT: br label [[FOR_COND1]] +; IS__CGSCC____: exit: +; IS__CGSCC____-NEXT: ret void ; entry: br label %if.end @@ -186,17 +116,11 @@ } define internal i32 @fn0(i32 %p1) { -; IS________OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn -; IS________OPM-LABEL: define {{[^@]+}}@fn0 -; IS________OPM-SAME: (i32 returned [[P1:%.*]]) #[[ATTR1]] { -; IS________OPM-NEXT: entry: -; IS________OPM-NEXT: ret i32 [[P1]] -; -; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn -; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@fn0 -; IS__CGSCC_NPM-SAME: (i32 [[P1:%.*]]) #[[ATTR1]] { -; IS__CGSCC_NPM-NEXT: entry: -; IS__CGSCC_NPM-NEXT: ret i32 undef +; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; IS__CGSCC____-LABEL: define {{[^@]+}}@fn0 +; IS__CGSCC____-SAME: (i32 [[P1:%.*]]) #[[ATTR1]] { +; IS__CGSCC____-NEXT: entry: +; IS__CGSCC____-NEXT: ret i32 undef ; entry: %tobool = icmp ne i32 %p1, 0 @@ -204,15 +128,10 @@ ret i32 %cond } ;. -; IS__TUNIT_OPM: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind } -; IS__TUNIT_OPM: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn } -; IS__TUNIT_OPM: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind null_pointer_is_valid } -; IS__TUNIT_OPM: attributes #[[ATTR3]] = { nofree nosync nounwind readnone } -;. -; IS__TUNIT_NPM: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind } -; IS__TUNIT_NPM: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind null_pointer_is_valid } +; IS__TUNIT____: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind } +; IS__TUNIT____: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind null_pointer_is_valid } ;. -; IS__CGSCC____: attributes #[[ATTR0:[0-9]+]] = { argmemonly nofree norecurse nosync nounwind } -; IS__CGSCC____: attributes #[[ATTR1:[0-9]+]] = { nofree norecurse nosync nounwind readnone willreturn } -; IS__CGSCC____: attributes #[[ATTR2:[0-9]+]] = { nofree norecurse nosync nounwind null_pointer_is_valid } +; IS__CGSCC____: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind } +; IS__CGSCC____: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn } +; IS__CGSCC____: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind null_pointer_is_valid } ;. diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/openmp_parallel_for.ll b/llvm/test/Transforms/Attributor/IPConstantProp/openmp_parallel_for.ll --- a/llvm/test/Transforms/Attributor/IPConstantProp/openmp_parallel_for.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/openmp_parallel_for.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals -; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=15 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=15 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM +; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=14 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=14 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM ; diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/return-argument.ll b/llvm/test/Transforms/Attributor/IPConstantProp/return-argument.ll --- a/llvm/test/Transforms/Attributor/IPConstantProp/return-argument.ll +++ b/llvm/test/Transforms/Attributor/IPConstantProp/return-argument.ll @@ -6,33 +6,33 @@ ;; This function returns its second argument on all return statements define internal i32* @incdec(i1 %C, i32* %V) { -; NOT_CGSCC_NPM: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn -; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@incdec -; NOT_CGSCC_NPM-SAME: (i1 [[C:%.*]], i32* noalias nofree noundef nonnull returned align 4 dereferenceable(4) "no-capture-maybe-returned" [[V:%.*]]) #[[ATTR0:[0-9]+]] { -; NOT_CGSCC_NPM-NEXT: [[X:%.*]] = load i32, i32* [[V]], align 4 -; NOT_CGSCC_NPM-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] -; NOT_CGSCC_NPM: T: -; NOT_CGSCC_NPM-NEXT: [[X1:%.*]] = add i32 [[X]], 1 -; NOT_CGSCC_NPM-NEXT: store i32 [[X1]], i32* [[V]], align 4 -; NOT_CGSCC_NPM-NEXT: ret i32* [[V]] -; NOT_CGSCC_NPM: F: -; NOT_CGSCC_NPM-NEXT: [[X2:%.*]] = sub i32 [[X]], 1 -; NOT_CGSCC_NPM-NEXT: store i32 [[X2]], i32* [[V]], align 4 -; NOT_CGSCC_NPM-NEXT: ret i32* [[V]] +; IS__TUNIT____: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn +; IS__TUNIT____-LABEL: define {{[^@]+}}@incdec +; IS__TUNIT____-SAME: (i1 [[C:%.*]], i32* noalias nofree noundef nonnull returned align 4 dereferenceable(4) "no-capture-maybe-returned" [[V:%.*]]) #[[ATTR0:[0-9]+]] { +; IS__TUNIT____-NEXT: [[X:%.*]] = load i32, i32* [[V]], align 4 +; IS__TUNIT____-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] +; IS__TUNIT____: T: +; IS__TUNIT____-NEXT: [[X1:%.*]] = add i32 [[X]], 1 +; IS__TUNIT____-NEXT: store i32 [[X1]], i32* [[V]], align 4 +; IS__TUNIT____-NEXT: ret i32* [[V]] +; IS__TUNIT____: F: +; IS__TUNIT____-NEXT: [[X2:%.*]] = sub i32 [[X]], 1 +; IS__TUNIT____-NEXT: store i32 [[X2]], i32* [[V]], align 4 +; IS__TUNIT____-NEXT: ret i32* [[V]] ; -; IS__CGSCC_NPM: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn -; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@incdec -; IS__CGSCC_NPM-SAME: (i1 [[C:%.*]], i32* noalias nofree noundef nonnull align 4 dereferenceable(4) "no-capture-maybe-returned" [[V:%.*]]) #[[ATTR0:[0-9]+]] { -; IS__CGSCC_NPM-NEXT: [[X:%.*]] = load i32, i32* [[V]], align 4 -; IS__CGSCC_NPM-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] -; IS__CGSCC_NPM: T: -; IS__CGSCC_NPM-NEXT: [[X1:%.*]] = add i32 [[X]], 1 -; IS__CGSCC_NPM-NEXT: store i32 [[X1]], i32* [[V]], align 4 -; IS__CGSCC_NPM-NEXT: ret i32* undef -; IS__CGSCC_NPM: F: -; IS__CGSCC_NPM-NEXT: [[X2:%.*]] = sub i32 [[X]], 1 -; IS__CGSCC_NPM-NEXT: store i32 [[X2]], i32* [[V]], align 4 -; IS__CGSCC_NPM-NEXT: ret i32* undef +; IS__CGSCC____: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn +; IS__CGSCC____-LABEL: define {{[^@]+}}@incdec +; IS__CGSCC____-SAME: (i1 [[C:%.*]], i32* noalias nofree noundef nonnull align 4 dereferenceable(4) "no-capture-maybe-returned" [[V:%.*]]) #[[ATTR0:[0-9]+]] { +; IS__CGSCC____-NEXT: [[X:%.*]] = load i32, i32* [[V]], align 4 +; IS__CGSCC____-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] +; IS__CGSCC____: T: +; IS__CGSCC____-NEXT: [[X1:%.*]] = add i32 [[X]], 1 +; IS__CGSCC____-NEXT: store i32 [[X1]], i32* [[V]], align 4 +; IS__CGSCC____-NEXT: ret i32* undef +; IS__CGSCC____: F: +; IS__CGSCC____-NEXT: [[X2:%.*]] = sub i32 [[X]], 1 +; IS__CGSCC____-NEXT: store i32 [[X2]], i32* [[V]], align 4 +; IS__CGSCC____-NEXT: ret i32* undef ; %X = load i32, i32* %V br i1 %C, label %T, label %F @@ -66,81 +66,43 @@ } define void @caller(i1 %C) personality i32 (...)* @__gxx_personality_v0 { -; IS__TUNIT_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn -; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@caller -; IS__TUNIT_OPM-SAME: (i1 [[C:%.*]]) #[[ATTR1]] personality i32 (...)* @__gxx_personality_v0 { -; IS__TUNIT_OPM-NEXT: [[Q:%.*]] = alloca i32, align 4 -; IS__TUNIT_OPM-NEXT: [[W:%.*]] = call align 4 i32* @incdec(i1 [[C]], i32* noalias nofree noundef nonnull align 4 dereferenceable(4) "no-capture-maybe-returned" [[Q]]) #[[ATTR2:[0-9]+]] -; IS__TUNIT_OPM-NEXT: [[S1:%.*]] = call { i32, i32 } @foo(i32 noundef 1, i32 noundef 2) #[[ATTR3:[0-9]+]] -; IS__TUNIT_OPM-NEXT: [[X1:%.*]] = extractvalue { i32, i32 } [[S1]], 0 -; IS__TUNIT_OPM-NEXT: [[S2:%.*]] = call { i32, i32 } @foo(i32 noundef 3, i32 noundef 4) #[[ATTR3]] -; IS__TUNIT_OPM-NEXT: br label [[OK:%.*]] -; IS__TUNIT_OPM: OK: -; IS__TUNIT_OPM-NEXT: [[X2:%.*]] = extractvalue { i32, i32 } [[S2]], 0 -; IS__TUNIT_OPM-NEXT: [[Z:%.*]] = add i32 [[X1]], [[X2]] -; IS__TUNIT_OPM-NEXT: store i32 [[Z]], i32* [[W]], align 4 -; IS__TUNIT_OPM-NEXT: br label [[RET:%.*]] -; IS__TUNIT_OPM: LPAD: -; IS__TUNIT_OPM-NEXT: unreachable -; IS__TUNIT_OPM: RET: -; IS__TUNIT_OPM-NEXT: ret void +; IS__TUNIT____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; IS__TUNIT____-LABEL: define {{[^@]+}}@caller +; IS__TUNIT____-SAME: (i1 [[C:%.*]]) #[[ATTR1]] personality i32 (...)* @__gxx_personality_v0 { +; IS__TUNIT____-NEXT: [[Q:%.*]] = alloca i32, align 4 +; IS__TUNIT____-NEXT: [[W:%.*]] = call align 4 i32* @incdec(i1 [[C]], i32* noalias nofree noundef nonnull align 4 dereferenceable(4) "no-capture-maybe-returned" [[Q]]) #[[ATTR2:[0-9]+]] +; IS__TUNIT____-NEXT: [[S1:%.*]] = call { i32, i32 } @foo(i32 noundef 1, i32 noundef 2) #[[ATTR3:[0-9]+]] +; IS__TUNIT____-NEXT: [[X1:%.*]] = extractvalue { i32, i32 } [[S1]], 0 +; IS__TUNIT____-NEXT: [[S2:%.*]] = call { i32, i32 } @foo(i32 noundef 3, i32 noundef 4) #[[ATTR3]] +; IS__TUNIT____-NEXT: br label [[OK:%.*]] +; IS__TUNIT____: OK: +; IS__TUNIT____-NEXT: [[X2:%.*]] = extractvalue { i32, i32 } [[S2]], 0 +; IS__TUNIT____-NEXT: [[Z:%.*]] = add i32 [[X1]], [[X2]] +; IS__TUNIT____-NEXT: store i32 [[Z]], i32* [[Q]], align 4 +; IS__TUNIT____-NEXT: br label [[RET:%.*]] +; IS__TUNIT____: LPAD: +; IS__TUNIT____-NEXT: unreachable +; IS__TUNIT____: RET: +; IS__TUNIT____-NEXT: ret void ; -; IS__TUNIT_NPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn -; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@caller -; IS__TUNIT_NPM-SAME: (i1 [[C:%.*]]) #[[ATTR1]] personality i32 (...)* @__gxx_personality_v0 { -; IS__TUNIT_NPM-NEXT: [[Q:%.*]] = alloca i32, align 4 -; IS__TUNIT_NPM-NEXT: [[W:%.*]] = call align 4 i32* @incdec(i1 [[C]], i32* noalias nofree noundef nonnull align 4 dereferenceable(4) "no-capture-maybe-returned" [[Q]]) #[[ATTR2:[0-9]+]] -; IS__TUNIT_NPM-NEXT: [[S1:%.*]] = call { i32, i32 } @foo(i32 noundef 1, i32 noundef 2) #[[ATTR3:[0-9]+]] -; IS__TUNIT_NPM-NEXT: [[X1:%.*]] = extractvalue { i32, i32 } [[S1]], 0 -; IS__TUNIT_NPM-NEXT: [[S2:%.*]] = call { i32, i32 } @foo(i32 noundef 3, i32 noundef 4) #[[ATTR3]] -; IS__TUNIT_NPM-NEXT: br label [[OK:%.*]] -; IS__TUNIT_NPM: OK: -; IS__TUNIT_NPM-NEXT: [[X2:%.*]] = extractvalue { i32, i32 } [[S2]], 0 -; IS__TUNIT_NPM-NEXT: [[Z:%.*]] = add i32 [[X1]], [[X2]] -; IS__TUNIT_NPM-NEXT: store i32 [[Z]], i32* [[Q]], align 4 -; IS__TUNIT_NPM-NEXT: br label [[RET:%.*]] -; IS__TUNIT_NPM: LPAD: -; IS__TUNIT_NPM-NEXT: unreachable -; IS__TUNIT_NPM: RET: -; IS__TUNIT_NPM-NEXT: ret void -; -; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn -; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@caller -; IS__CGSCC_OPM-SAME: (i1 [[C:%.*]]) #[[ATTR1]] personality i32 (...)* @__gxx_personality_v0 { -; IS__CGSCC_OPM-NEXT: [[Q:%.*]] = alloca i32, align 4 -; IS__CGSCC_OPM-NEXT: [[W:%.*]] = call align 4 i32* @incdec(i1 [[C]], i32* noalias nofree noundef nonnull align 4 dereferenceable(4) "no-capture-maybe-returned" [[Q]]) #[[ATTR2:[0-9]+]] -; IS__CGSCC_OPM-NEXT: [[S1:%.*]] = call { i32, i32 } @foo(i32 noundef 1, i32 noundef 2) #[[ATTR3:[0-9]+]] -; IS__CGSCC_OPM-NEXT: [[X1:%.*]] = extractvalue { i32, i32 } [[S1]], 0 -; IS__CGSCC_OPM-NEXT: [[S2:%.*]] = call { i32, i32 } @foo(i32 noundef 3, i32 noundef 4) #[[ATTR4:[0-9]+]] -; IS__CGSCC_OPM-NEXT: br label [[OK:%.*]] -; IS__CGSCC_OPM: OK: -; IS__CGSCC_OPM-NEXT: [[X2:%.*]] = extractvalue { i32, i32 } [[S2]], 0 -; IS__CGSCC_OPM-NEXT: [[Z:%.*]] = add i32 [[X1]], [[X2]] -; IS__CGSCC_OPM-NEXT: store i32 [[Z]], i32* [[W]], align 4 -; IS__CGSCC_OPM-NEXT: br label [[RET:%.*]] -; IS__CGSCC_OPM: LPAD: -; IS__CGSCC_OPM-NEXT: unreachable -; IS__CGSCC_OPM: RET: -; IS__CGSCC_OPM-NEXT: ret void -; -; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn -; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@caller -; IS__CGSCC_NPM-SAME: (i1 [[C:%.*]]) #[[ATTR1]] personality i32 (...)* @__gxx_personality_v0 { -; IS__CGSCC_NPM-NEXT: [[Q:%.*]] = alloca i32, align 4 -; IS__CGSCC_NPM-NEXT: [[W:%.*]] = call i32* @incdec(i1 [[C]], i32* noalias nofree noundef nonnull align 4 dereferenceable(4) "no-capture-maybe-returned" [[Q]]) #[[ATTR2:[0-9]+]] -; IS__CGSCC_NPM-NEXT: [[S1:%.*]] = call { i32, i32 } @foo(i32 noundef 1, i32 noundef 2) #[[ATTR3:[0-9]+]] -; IS__CGSCC_NPM-NEXT: [[X1:%.*]] = extractvalue { i32, i32 } [[S1]], 0 -; IS__CGSCC_NPM-NEXT: [[S2:%.*]] = call { i32, i32 } @foo(i32 noundef 3, i32 noundef 4) #[[ATTR4:[0-9]+]] -; IS__CGSCC_NPM-NEXT: br label [[OK:%.*]] -; IS__CGSCC_NPM: OK: -; IS__CGSCC_NPM-NEXT: [[X2:%.*]] = extractvalue { i32, i32 } [[S2]], 0 -; IS__CGSCC_NPM-NEXT: [[Z:%.*]] = add i32 [[X1]], [[X2]] -; IS__CGSCC_NPM-NEXT: store i32 [[Z]], i32* [[Q]], align 4 -; IS__CGSCC_NPM-NEXT: br label [[RET:%.*]] -; IS__CGSCC_NPM: LPAD: -; IS__CGSCC_NPM-NEXT: unreachable -; IS__CGSCC_NPM: RET: -; IS__CGSCC_NPM-NEXT: ret void +; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; IS__CGSCC____-LABEL: define {{[^@]+}}@caller +; IS__CGSCC____-SAME: (i1 [[C:%.*]]) #[[ATTR1]] personality i32 (...)* @__gxx_personality_v0 { +; IS__CGSCC____-NEXT: [[Q:%.*]] = alloca i32, align 4 +; IS__CGSCC____-NEXT: [[W:%.*]] = call i32* @incdec(i1 [[C]], i32* noalias nofree noundef nonnull align 4 dereferenceable(4) "no-capture-maybe-returned" [[Q]]) #[[ATTR2:[0-9]+]] +; IS__CGSCC____-NEXT: [[S1:%.*]] = call { i32, i32 } @foo(i32 noundef 1, i32 noundef 2) #[[ATTR3:[0-9]+]] +; IS__CGSCC____-NEXT: [[X1:%.*]] = extractvalue { i32, i32 } [[S1]], 0 +; IS__CGSCC____-NEXT: [[S2:%.*]] = call { i32, i32 } @foo(i32 noundef 3, i32 noundef 4) #[[ATTR3]] +; IS__CGSCC____-NEXT: br label [[OK:%.*]] +; IS__CGSCC____: OK: +; IS__CGSCC____-NEXT: [[X2:%.*]] = extractvalue { i32, i32 } [[S2]], 0 +; IS__CGSCC____-NEXT: [[Z:%.*]] = add i32 [[X1]], [[X2]] +; IS__CGSCC____-NEXT: store i32 [[Z]], i32* [[Q]], align 4 +; IS__CGSCC____-NEXT: br label [[RET:%.*]] +; IS__CGSCC____: LPAD: +; IS__CGSCC____-NEXT: unreachable +; IS__CGSCC____: RET: +; IS__CGSCC____-NEXT: ret void ; %Q = alloca i32 ;; Call incdec to see if %W is properly replaced by %Q @@ -172,12 +134,11 @@ ;. ; IS__TUNIT____: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind willreturn } ; IS__TUNIT____: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn } -; IS__TUNIT____: attributes #[[ATTR2:[0-9]+]] = { nofree nosync nounwind willreturn } -; IS__TUNIT____: attributes #[[ATTR3:[0-9]+]] = { nofree nosync nounwind readnone willreturn } +; IS__TUNIT____: attributes #[[ATTR2]] = { nofree nosync nounwind willreturn } +; IS__TUNIT____: attributes #[[ATTR3]] = { nofree nosync nounwind readnone willreturn } ;. ; IS__CGSCC____: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind willreturn } ; IS__CGSCC____: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readnone willreturn } -; IS__CGSCC____: attributes #[[ATTR2:[0-9]+]] = { nounwind willreturn } -; IS__CGSCC____: attributes #[[ATTR3:[0-9]+]] = { readnone willreturn } -; IS__CGSCC____: attributes #[[ATTR4:[0-9]+]] = { nounwind readnone willreturn } +; IS__CGSCC____: attributes #[[ATTR2]] = { nosync nounwind willreturn } +; IS__CGSCC____: attributes #[[ATTR3]] = { nosync nounwind readnone willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/align.ll b/llvm/test/Transforms/Attributor/align.ll --- a/llvm/test/Transforms/Attributor/align.ll +++ b/llvm/test/Transforms/Attributor/align.ll @@ -16,6 +16,7 @@ ; CHECK: @[[A1:[a-zA-Z0-9_$"\\.-]+]] = common global i8 0, align 8 ; CHECK: @[[A2:[a-zA-Z0-9_$"\\.-]+]] = common global i8 0, align 16 ; CHECK: @[[CND:[a-zA-Z0-9_$"\\.-]+]] = external global i1 +; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = global i8 0, align 32 ;. define i32* @test1(i32* align 8 %0) #0 { ; CHECK: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable @@ -1114,6 +1115,74 @@ declare void @align4_callee(i8* align(4) %p) +@G = global i8 0, align 32 + +define internal i8* @aligned_8_return(i8* %a, i1 %c1, i1 %c2) norecurse { +; NOT_CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; NOT_CGSCC_OPM-LABEL: define {{[^@]+}}@aligned_8_return +; NOT_CGSCC_OPM-SAME: (i8* noalias nofree readnone align 16 "no-capture-maybe-returned" [[A:%.*]], i1 [[C1:%.*]], i1 [[C2:%.*]]) #[[ATTR9]] { +; NOT_CGSCC_OPM-NEXT: [[STACK:%.*]] = alloca i8*, align 8 +; NOT_CGSCC_OPM-NEXT: br i1 [[C1]], label [[T:%.*]], label [[F:%.*]] +; NOT_CGSCC_OPM: t: +; NOT_CGSCC_OPM-NEXT: [[GEP:%.*]] = getelementptr i8, i8* @G, i32 8 +; NOT_CGSCC_OPM-NEXT: [[SEL:%.*]] = select i1 [[C2]], i8* [[A]], i8* [[GEP]] +; NOT_CGSCC_OPM-NEXT: store i8* [[SEL]], i8** [[STACK]], align 8 +; NOT_CGSCC_OPM-NEXT: br label [[END:%.*]] +; NOT_CGSCC_OPM: f: +; NOT_CGSCC_OPM-NEXT: store i8* @G, i8** [[STACK]], align 8 +; NOT_CGSCC_OPM-NEXT: br label [[END]] +; NOT_CGSCC_OPM: end: +; NOT_CGSCC_OPM-NEXT: [[L:%.*]] = load i8*, i8** [[STACK]], align 8 +; NOT_CGSCC_OPM-NEXT: ret i8* [[L]] +; +; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@aligned_8_return +; IS__CGSCC_OPM-SAME: (i8* noalias nofree readnone align 16 "no-capture-maybe-returned" [[A:%.*]], i1 [[C1:%.*]], i1 [[C2:%.*]]) #[[ATTR10]] { +; IS__CGSCC_OPM-NEXT: [[STACK:%.*]] = alloca i8*, align 8 +; IS__CGSCC_OPM-NEXT: br i1 [[C1]], label [[T:%.*]], label [[F:%.*]] +; IS__CGSCC_OPM: t: +; IS__CGSCC_OPM-NEXT: [[GEP:%.*]] = getelementptr i8, i8* @G, i32 8 +; IS__CGSCC_OPM-NEXT: [[SEL:%.*]] = select i1 [[C2]], i8* [[A]], i8* [[GEP]] +; IS__CGSCC_OPM-NEXT: store i8* [[SEL]], i8** [[STACK]], align 8 +; IS__CGSCC_OPM-NEXT: br label [[END:%.*]] +; IS__CGSCC_OPM: f: +; IS__CGSCC_OPM-NEXT: store i8* @G, i8** [[STACK]], align 8 +; IS__CGSCC_OPM-NEXT: br label [[END]] +; IS__CGSCC_OPM: end: +; IS__CGSCC_OPM-NEXT: [[L:%.*]] = load i8*, i8** [[STACK]], align 8 +; IS__CGSCC_OPM-NEXT: ret i8* [[L]] +; + %stack = alloca i8* + br i1 %c1, label %t, label %f +t: + %gep = getelementptr i8, i8* @G, i32 8 + %sel = select i1 %c2, i8* %a, i8* %gep + store i8* %sel, i8** %stack + br label %end +f: + store i8* @G, i8** %stack + br label %end +end: + %l = load i8*, i8** %stack + ret i8* %l +} + +define i8* @aligned_8_return_caller(i8* align(16) %a, i1 %c1, i1 %c2) { +; NOT_CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; NOT_CGSCC_OPM-LABEL: define {{[^@]+}}@aligned_8_return_caller +; NOT_CGSCC_OPM-SAME: (i8* nofree readnone align 16 "no-capture-maybe-returned" [[A:%.*]], i1 [[C1:%.*]], i1 [[C2:%.*]]) #[[ATTR9]] { +; NOT_CGSCC_OPM-NEXT: [[R:%.*]] = call align 8 i8* @aligned_8_return(i8* noalias nofree readnone align 16 "no-capture-maybe-returned" [[A]], i1 [[C1]], i1 [[C2]]) #[[ATTR12:[0-9]+]] +; NOT_CGSCC_OPM-NEXT: ret i8* [[R]] +; +; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn +; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@aligned_8_return_caller +; IS__CGSCC_OPM-SAME: (i8* nofree readnone align 16 "no-capture-maybe-returned" [[A:%.*]], i1 [[C1:%.*]], i1 [[C2:%.*]]) #[[ATTR10]] { +; IS__CGSCC_OPM-NEXT: [[R:%.*]] = call align 8 i8* @aligned_8_return(i8* noalias nofree readnone align 16 "no-capture-maybe-returned" [[A]], i1 [[C1]], i1 [[C2]]) #[[ATTR13:[0-9]+]] +; IS__CGSCC_OPM-NEXT: ret i8* [[R]] +; + %r = call i8* @aligned_8_return(i8* %a, i1 %c1, i1 %c2) + ret i8* %r +} attributes #0 = { nounwind uwtable noinline } attributes #1 = { uwtable noinline } @@ -1131,6 +1200,7 @@ ; IS__TUNIT____: attributes #[[ATTR9]] = { nofree norecurse nosync nounwind readnone willreturn } ; IS__TUNIT____: attributes #[[ATTR10]] = { nofree norecurse nosync nounwind readonly willreturn } ; IS__TUNIT____: attributes #[[ATTR11]] = { nofree nosync nounwind readonly willreturn } +; IS__TUNIT____: attributes #[[ATTR12]] = { nofree nosync nounwind readnone willreturn } ;. ; IS__CGSCC_OPM: attributes #[[ATTR0]] = { nofree noinline norecurse nosync nounwind readnone willreturn uwtable } ; IS__CGSCC_OPM: attributes #[[ATTR1]] = { nofree noinline nosync nounwind readnone willreturn uwtable } @@ -1145,6 +1215,7 @@ ; IS__CGSCC_OPM: attributes #[[ATTR10]] = { nofree norecurse nosync nounwind readnone willreturn } ; IS__CGSCC_OPM: attributes #[[ATTR11]] = { nofree norecurse nosync nounwind readonly willreturn } ; IS__CGSCC_OPM: attributes #[[ATTR12]] = { readonly willreturn } +; IS__CGSCC_OPM: attributes #[[ATTR13]] = { readnone willreturn } ;. ; IS__CGSCC_NPM: attributes #[[ATTR0]] = { nofree noinline norecurse nosync nounwind readnone willreturn uwtable } ; IS__CGSCC_NPM: attributes #[[ATTR1]] = { noinline norecurse nounwind uwtable } @@ -1158,4 +1229,5 @@ ; IS__CGSCC_NPM: attributes #[[ATTR9]] = { nofree norecurse nosync nounwind readnone willreturn } ; IS__CGSCC_NPM: attributes #[[ATTR10]] = { nofree norecurse nosync nounwind readonly willreturn } ; IS__CGSCC_NPM: attributes #[[ATTR11]] = { readonly willreturn } +; IS__CGSCC_NPM: attributes #[[ATTR12]] = { readnone willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/dereferenceable-1.ll b/llvm/test/Transforms/Attributor/dereferenceable-1.ll --- a/llvm/test/Transforms/Attributor/dereferenceable-1.ll +++ b/llvm/test/Transforms/Attributor/dereferenceable-1.ll @@ -10,6 +10,9 @@ ; TEST 1 ; take mininimum of return values ; +;. +; CHECK: @[[G:[a-zA-Z0-9_$"\\.-]+]] = global i64 0 +;. define i32* @test1(i32* dereferenceable(4) %0, double* dereferenceable(8) %1, i1 zeroext %2) local_unnamed_addr { ; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn ; CHECK-LABEL: define {{[^@]+}}@test1 @@ -316,7 +319,7 @@ define void @test8(i8* %ptr) #0 { ; IS________OPM: Function Attrs: argmemonly nofree norecurse nosync nounwind writeonly ; IS________OPM-LABEL: define {{[^@]+}}@test8 -; IS________OPM-SAME: (i8* nocapture nofree nonnull writeonly [[PTR:%.*]]) #[[ATTR3:[0-9]+]] { +; IS________OPM-SAME: (i8* nocapture nofree writeonly [[PTR:%.*]]) #[[ATTR3:[0-9]+]] { ; IS________OPM-NEXT: br label [[TMP1:%.*]] ; IS________OPM: 1: ; IS________OPM-NEXT: [[I_0:%.*]] = phi i32 [ 20, [[TMP0:%.*]] ], [ [[TMP4:%.*]], [[TMP5:%.*]] ] @@ -915,6 +918,32 @@ declare void @unknown_use32(i32*) willreturn nounwind declare void @llvm.assume(i1) +@g = global i64 0 +define void @max_offset(i1 %c) { +; CHECK: Function Attrs: nounwind willreturn +; CHECK-LABEL: define {{[^@]+}}@max_offset +; CHECK-SAME: (i1 [[C:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[C]], label [[T:%.*]], label [[F:%.*]] +; CHECK: t: +; CHECK-NEXT: br label [[F]] +; CHECK: f: +; CHECK-NEXT: [[PHI:%.*]] = phi i8* [ getelementptr (i8, i8* bitcast (i64* @g to i8*), i64 2), [[T]] ], [ bitcast (i64* @g to i8*), [[ENTRY:%.*]] ] +; CHECK-NEXT: call void @unknown_use8(i8* noundef align 2 dereferenceable_or_null(6) [[PHI]]) #[[ATTR1]] +; CHECK-NEXT: ret void +; +entry: + %bc = bitcast i64* @g to i8* + br i1 %c, label %t, label %f +t: + %gep = getelementptr i8, i8* %bc, i64 2 + br label %f +f: + %phi = phi i8* [%gep, %t], [%bc, %entry] + call void @unknown_use8(i8* %phi) + ret void +} + !0 = !{i64 10, i64 100} ;. diff --git a/llvm/test/Transforms/Attributor/heap_to_stack.ll b/llvm/test/Transforms/Attributor/heap_to_stack.ll --- a/llvm/test/Transforms/Attributor/heap_to_stack.ll +++ b/llvm/test/Transforms/Attributor/heap_to_stack.ll @@ -220,12 +220,21 @@ ; leave alone a constant-but-invalid alignment define void @test3d(i8* %p) { -; CHECK-LABEL: define {{[^@]+}}@test3d -; CHECK-SAME; (i8* nocapture [[P:%.*]]) { -; CHECK-NEXT: [[TMP1:%.*]] = tail call noalias i8* @aligned_alloc(i64 noundef 33, i64 noundef 128) -; CHECK: tail call void @free(i8* noalias nocapture [[TMP1]]) -; CHECK-NEXT: ret void +; IS________OPM-LABEL: define {{[^@]+}}@test3d +; IS________OPM-SAME: (i8* nocapture [[P:%.*]]) { +; IS________OPM-NEXT: [[TMP1:%.*]] = tail call noalias i8* @aligned_alloc(i64 noundef 33, i64 noundef 128) +; IS________OPM-NEXT: tail call void @nofree_arg_only(i8* nocapture nofree [[TMP1]], i8* nocapture [[P]]) +; IS________OPM-NEXT: tail call void @free(i8* noalias nocapture [[TMP1]]) +; IS________OPM-NEXT: ret void ; +; IS________NPM-LABEL: define {{[^@]+}}@test3d +; IS________NPM-SAME: (i8* nocapture [[P:%.*]]) { +; IS________NPM-NEXT: [[TMP1:%.*]] = tail call noalias i8* @aligned_alloc(i64 noundef 33, i64 noundef 128) +; IS________NPM-NEXT: tail call void @nofree_arg_only(i8* noalias nocapture nofree [[TMP1]], i8* nocapture [[P]]) +; IS________NPM-NEXT: tail call void @free(i8* noalias nocapture [[TMP1]]) +; IS________NPM-NEXT: ret void +; +; CHECK-SAME; (i8* nocapture [[P:%.*]]) { %1 = tail call noalias i8* @aligned_alloc(i64 33, i64 128) tail call void @nofree_arg_only(i8* %1, i8* %p) tail call void @free(i8* %1) @@ -578,8 +587,9 @@ ; IS________NPM-NEXT: [[TMP14]] = add nsw i32 [[DOT1]], 1 ; IS________NPM-NEXT: br label [[TMP8]] ; IS________NPM: 15: -; IS________NPM-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP3]], align 4 -; IS________NPM-NEXT: ret i32 [[TMP16]] +; IS________NPM-NEXT: [[TMP16:%.*]] = bitcast i32* [[TMP3]] to i8* +; IS________NPM-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP3]], align 4 +; IS________NPM-NEXT: ret i32 [[TMP17]] ; %2 = call noalias i8* @malloc(i64 4) %3 = bitcast i8* %2 to i32* diff --git a/llvm/test/Transforms/Attributor/internal-noalias.ll b/llvm/test/Transforms/Attributor/internal-noalias.ll --- a/llvm/test/Transforms/Attributor/internal-noalias.ll +++ b/llvm/test/Transforms/Attributor/internal-noalias.ll @@ -144,12 +144,17 @@ } define internal i32 @noalias_args_argmem_rn(i32* %A, i32* %B) #1 { -; CHECK: Function Attrs: argmemonly nofree noinline norecurse nosync nounwind willreturn uwtable -; CHECK-LABEL: define {{[^@]+}}@noalias_args_argmem_rn -; CHECK-SAME: (i32* noalias nocapture nofree noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR1:[0-9]+]] { -; CHECK-NEXT: [[T0:%.*]] = load i32, i32* [[B]], align 4 -; CHECK-NEXT: store i32 0, i32* [[B]], align 4 -; CHECK-NEXT: ret i32 [[T0]] +; IS__TUNIT____: Function Attrs: argmemonly nofree noinline norecurse nosync nounwind willreturn uwtable +; IS__TUNIT____-LABEL: define {{[^@]+}}@noalias_args_argmem_rn +; IS__TUNIT____-SAME: (i32* noalias nocapture nofree noundef nonnull align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR1]] { +; IS__TUNIT____-NEXT: [[T0:%.*]] = load i32, i32* [[B]], align 4 +; IS__TUNIT____-NEXT: ret i32 [[T0]] +; +; IS__CGSCC____: Function Attrs: nofree noinline norecurse nosync nounwind readnone willreturn uwtable +; IS__CGSCC____-LABEL: define {{[^@]+}}@noalias_args_argmem_rn +; IS__CGSCC____-SAME: (i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR2]] { +; IS__CGSCC____-NEXT: [[T0:%.*]] = load i32, i32* undef, align 4 +; IS__CGSCC____-NEXT: ret i32 undef ; %t0 = load i32, i32* %B, align 4 store i32 0, i32* %B @@ -170,8 +175,7 @@ ; IS__CGSCC____-SAME: () #[[ATTR3]] { ; IS__CGSCC____-NEXT: [[B:%.*]] = alloca i32, align 4 ; IS__CGSCC____-NEXT: store i32 5, i32* [[B]], align 4 -; IS__CGSCC____-NEXT: [[CALL:%.*]] = call i32 @noalias_args_argmem_rn(i32* noalias nocapture nofree noundef nonnull align 4 dereferenceable(4) [[B]]) #[[ATTR6:[0-9]+]] -; IS__CGSCC____-NEXT: ret i32 [[CALL]] +; IS__CGSCC____-NEXT: ret i32 5 ; %B = alloca i32, align 4 store i32 5, i32* %B, align 4 @@ -194,5 +198,4 @@ ; IS__CGSCC____: attributes #[[ATTR3]] = { nofree norecurse nosync nounwind readnone willreturn } ; IS__CGSCC____: attributes #[[ATTR4]] = { nounwind readonly } ; IS__CGSCC____: attributes #[[ATTR5]] = { nosync nounwind readonly } -; IS__CGSCC____: attributes #[[ATTR6]] = { nounwind willreturn } ;. diff --git a/llvm/test/Transforms/Attributor/liveness.ll b/llvm/test/Transforms/Attributor/liveness.ll --- a/llvm/test/Transforms/Attributor/liveness.ll +++ b/llvm/test/Transforms/Attributor/liveness.ll @@ -2281,7 +2281,7 @@ call void %fp(i32* %a, i32* %b, i32* %a, i64 -1, i32** null) ret void } -; FIXME: We have to prevent the propagation of %fp in the new pm CGSCC pass until the CallGraphUpdater can handle the new call edge. + define internal void @call_via_pointer_with_dead_args_internal_a(i32* %a, i32* %b, void (i32*, i32*, i32*, i64, i32**)* %fp) { ; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@call_via_pointer_with_dead_args_internal_a ; NOT_CGSCC_NPM-SAME: (i32* [[A:%.*]], i32* noundef nonnull align 128 dereferenceable(4) [[B:%.*]]) { @@ -2289,8 +2289,8 @@ ; NOT_CGSCC_NPM-NEXT: ret void ; ; IS__CGSCC____-LABEL: define {{[^@]+}}@call_via_pointer_with_dead_args_internal_a -; IS__CGSCC____-SAME: (i32* [[A:%.*]], i32* noundef nonnull align 128 dereferenceable(4) [[B:%.*]], void (i32*, i32*, i32*, i64, i32**)* nocapture nofree noundef nonnull [[FP:%.*]]) { -; IS__CGSCC____-NEXT: call void [[FP]](i32* [[A]], i32* nonnull align 128 dereferenceable(4) [[B]], i32* [[A]], i64 -1, i32** null) +; IS__CGSCC____-SAME: (i32* [[A:%.*]], i32* noalias nocapture nofree nonnull readnone align 128 dereferenceable(4) [[B:%.*]]) { +; IS__CGSCC____-NEXT: call void @called_via_pointer(i32* [[A]], i32* noalias nocapture nofree nonnull readnone align 128 dereferenceable(4) undef, i32* noalias nocapture nofree readnone undef, i64 undef, i32** noalias nocapture nofree readnone align 4294967296 undef) ; IS__CGSCC____-NEXT: ret void ; call void %fp(i32* %a, i32* %b, i32* %a, i64 -1, i32** null) @@ -2303,8 +2303,8 @@ ; NOT_CGSCC_NPM-NEXT: ret void ; ; IS__CGSCC____-LABEL: define {{[^@]+}}@call_via_pointer_with_dead_args_internal_b -; IS__CGSCC____-SAME: (i32* [[A:%.*]], i32* noundef nonnull align 128 dereferenceable(4) [[B:%.*]], void (i32*, i32*, i32*, i64, i32**)* nocapture nofree noundef nonnull [[FP:%.*]]) { -; IS__CGSCC____-NEXT: call void [[FP]](i32* [[A]], i32* nonnull align 128 dereferenceable(4) [[B]], i32* [[A]], i64 -1, i32** null) +; IS__CGSCC____-SAME: (i32* [[A:%.*]], i32* noalias nocapture nofree nonnull readnone align 128 dereferenceable(4) [[B:%.*]]) { +; IS__CGSCC____-NEXT: call void @called_via_pointer_internal_2(i32* [[A]]) ; IS__CGSCC____-NEXT: ret void ; call void %fp(i32* %a, i32* %b, i32* %a, i64 -1, i32** null) @@ -2327,12 +2327,10 @@ ; IS__CGSCC____-SAME: (i32* [[A:%.*]], i32* [[B:%.*]]) { ; IS__CGSCC____-NEXT: [[PTR1:%.*]] = alloca i32, align 128 ; IS__CGSCC____-NEXT: [[PTR2:%.*]] = alloca i32, align 128 -; IS__CGSCC____-NEXT: [[PTR3:%.*]] = alloca i32, align 128 -; IS__CGSCC____-NEXT: [[PTR4:%.*]] = alloca i32, align 128 ; IS__CGSCC____-NEXT: call void @call_via_pointer_with_dead_args(i32* [[A]], i32* noundef nonnull align 128 dereferenceable(4) [[PTR1]], void (i32*, i32*, i32*, i64, i32**)* nocapture nofree noundef nonnull @called_via_pointer) ; IS__CGSCC____-NEXT: call void @call_via_pointer_with_dead_args(i32* [[A]], i32* noundef nonnull align 128 dereferenceable(4) [[PTR2]], void (i32*, i32*, i32*, i64, i32**)* nocapture nofree noundef nonnull @called_via_pointer_internal_1) -; IS__CGSCC____-NEXT: call void @call_via_pointer_with_dead_args_internal_a(i32* [[B]], i32* noundef nonnull align 128 dereferenceable(4) [[PTR3]], void (i32*, i32*, i32*, i64, i32**)* nocapture nofree noundef nonnull @called_via_pointer) -; IS__CGSCC____-NEXT: call void @call_via_pointer_with_dead_args_internal_b(i32* [[B]], i32* noundef nonnull align 128 dereferenceable(4) [[PTR4]], void (i32*, i32*, i32*, i64, i32**)* nocapture nofree noundef nonnull @called_via_pointer_internal_2) +; IS__CGSCC____-NEXT: call void @call_via_pointer_with_dead_args_internal_a(i32* [[B]], i32* noalias nocapture nofree nonnull readnone align 128 dereferenceable(4) undef) +; IS__CGSCC____-NEXT: call void @call_via_pointer_with_dead_args_internal_b(i32* [[B]], i32* noalias nocapture nofree nonnull readnone align 128 dereferenceable(4) undef) ; IS__CGSCC____-NEXT: ret void ; %ptr1 = alloca i32, align 128 @@ -2373,12 +2371,19 @@ } ; FIXME: Figure out why the MODULE has the unused arguments still define internal void @called_via_pointer_internal_2(i32* %a, i32* %b, i32* %c, i64 %d, i32** %e) { -; CHECK-LABEL: define {{[^@]+}}@called_via_pointer_internal_2 -; CHECK-SAME: (i32* [[A:%.*]], i32* nocapture nofree readnone [[B:%.*]], i32* nocapture nofree readnone [[C:%.*]], i64 [[D:%.*]], i32** nocapture nofree readnone [[E:%.*]]) { -; CHECK-NEXT: entry: -; CHECK-NEXT: tail call void @use_i32p(i32* [[A]]) -; CHECK-NEXT: tail call void @use_i32p(i32* [[A]]) -; CHECK-NEXT: ret void +; NOT_CGSCC_NPM-LABEL: define {{[^@]+}}@called_via_pointer_internal_2 +; NOT_CGSCC_NPM-SAME: (i32* [[A:%.*]], i32* nocapture nofree readnone [[B:%.*]], i32* nocapture nofree readnone [[C:%.*]], i64 [[D:%.*]], i32** nocapture nofree readnone [[E:%.*]]) { +; NOT_CGSCC_NPM-NEXT: entry: +; NOT_CGSCC_NPM-NEXT: tail call void @use_i32p(i32* [[A]]) +; NOT_CGSCC_NPM-NEXT: tail call void @use_i32p(i32* [[A]]) +; NOT_CGSCC_NPM-NEXT: ret void +; +; IS__CGSCC____-LABEL: define {{[^@]+}}@called_via_pointer_internal_2 +; IS__CGSCC____-SAME: (i32* [[A:%.*]]) { +; IS__CGSCC____-NEXT: entry: +; IS__CGSCC____-NEXT: tail call void @use_i32p(i32* [[A]]) +; IS__CGSCC____-NEXT: tail call void @use_i32p(i32* [[A]]) +; IS__CGSCC____-NEXT: ret void ; entry: tail call void @use_i32p(i32* %a) diff --git a/llvm/test/Transforms/Attributor/noalias.ll b/llvm/test/Transforms/Attributor/noalias.ll --- a/llvm/test/Transforms/Attributor/noalias.ll +++ b/llvm/test/Transforms/Attributor/noalias.ll @@ -402,23 +402,23 @@ ; IS________OPM-LABEL: define {{[^@]+}}@test12_4() { ; IS________OPM-NEXT: [[A:%.*]] = tail call noalias i8* @malloc(i64 noundef 4) ; IS________OPM-NEXT: [[B:%.*]] = tail call noalias i8* @malloc(i64 noundef 4) -; IS________OPM-NEXT: [[A_0:%.*]] = getelementptr i8, i8* [[A]], i64 0 ; IS________OPM-NEXT: [[A_1:%.*]] = getelementptr i8, i8* [[A]], i64 1 -; IS________OPM-NEXT: [[B_0:%.*]] = getelementptr i8, i8* [[B]], i64 0 ; IS________OPM-NEXT: tail call void @two_args(i8* nocapture [[A]], i8* nocapture [[B]]) -; IS________OPM-NEXT: tail call void @two_args(i8* nocapture [[A]], i8* nocapture [[A_0]]) +; IS________OPM-NEXT: tail call void @two_args(i8* nocapture [[A]], i8* nocapture [[A]]) ; IS________OPM-NEXT: tail call void @two_args(i8* nocapture [[A]], i8* nocapture [[A_1]]) -; IS________OPM-NEXT: tail call void @two_args(i8* nocapture [[A_0]], i8* nocapture [[B_0]]) +; IS________OPM-NEXT: tail call void @two_args(i8* nocapture [[A]], i8* nocapture [[B]]) ; IS________OPM-NEXT: ret void ; ; NOT_TUNIT_OPM-LABEL: define {{[^@]+}}@test12_4() { ; NOT_TUNIT_OPM-NEXT: [[A:%.*]] = tail call noalias i8* @malloc(i64 noundef 4) ; NOT_TUNIT_OPM-NEXT: [[B:%.*]] = tail call noalias i8* @malloc(i64 noundef 4) +; NOT_TUNIT_OPM-NEXT: [[A_0:%.*]] = getelementptr i8, i8* [[A]], i64 0 ; NOT_TUNIT_OPM-NEXT: [[A_1:%.*]] = getelementptr i8, i8* [[A]], i64 1 +; NOT_TUNIT_OPM-NEXT: [[B_0:%.*]] = getelementptr i8, i8* [[B]], i64 0 ; NOT_TUNIT_OPM-NEXT: tail call void @two_args(i8* noalias nocapture [[A]], i8* noalias nocapture [[B]]) -; NOT_TUNIT_OPM-NEXT: tail call void @two_args(i8* nocapture [[A]], i8* nocapture [[A]]) +; NOT_TUNIT_OPM-NEXT: tail call void @two_args(i8* nocapture [[A]], i8* nocapture [[A_0]]) ; NOT_TUNIT_OPM-NEXT: tail call void @two_args(i8* nocapture [[A]], i8* nocapture [[A_1]]) -; NOT_TUNIT_OPM-NEXT: tail call void @two_args(i8* nocapture [[A]], i8* nocapture [[B]]) +; NOT_TUNIT_OPM-NEXT: tail call void @two_args(i8* nocapture [[A_0]], i8* nocapture [[B_0]]) ; NOT_TUNIT_OPM-NEXT: ret void ; %A = tail call noalias i8* @malloc(i64 4) @@ -452,14 +452,14 @@ define void @test13_use_noalias(){ ; IS________OPM-LABEL: define {{[^@]+}}@test13_use_noalias() { ; IS________OPM-NEXT: [[M1:%.*]] = tail call noalias i8* @malloc(i64 noundef 4) -; IS________OPM-NEXT: [[C1:%.*]] = bitcast i8* [[M1]] to i16* -; IS________OPM-NEXT: [[C2:%.*]] = bitcast i16* [[C1]] to i8* -; IS________OPM-NEXT: call void @use_i8_internal(i8* noalias nocapture [[C2]]) +; IS________OPM-NEXT: call void @use_i8_internal(i8* noalias nocapture [[M1]]) ; IS________OPM-NEXT: ret void ; ; NOT_TUNIT_OPM-LABEL: define {{[^@]+}}@test13_use_noalias() { ; NOT_TUNIT_OPM-NEXT: [[M1:%.*]] = tail call noalias i8* @malloc(i64 noundef 4) -; NOT_TUNIT_OPM-NEXT: call void @use_i8_internal(i8* noalias nocapture [[M1]]) +; NOT_TUNIT_OPM-NEXT: [[C1:%.*]] = bitcast i8* [[M1]] to i16* +; NOT_TUNIT_OPM-NEXT: [[C2:%.*]] = bitcast i16* [[C1]] to i8* +; NOT_TUNIT_OPM-NEXT: call void @use_i8_internal(i8* noalias nocapture [[C2]]) ; NOT_TUNIT_OPM-NEXT: ret void ; ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@test13_use_noalias() @@ -478,17 +478,17 @@ define void @test13_use_alias(){ ; IS________OPM-LABEL: define {{[^@]+}}@test13_use_alias() { ; IS________OPM-NEXT: [[M1:%.*]] = tail call noalias i8* @malloc(i64 noundef 4) -; IS________OPM-NEXT: [[C1:%.*]] = bitcast i8* [[M1]] to i16* -; IS________OPM-NEXT: [[C2A:%.*]] = bitcast i16* [[C1]] to i8* -; IS________OPM-NEXT: [[C2B:%.*]] = bitcast i16* [[C1]] to i8* -; IS________OPM-NEXT: call void @use_i8_internal(i8* nocapture [[C2A]]) -; IS________OPM-NEXT: call void @use_i8_internal(i8* nocapture [[C2B]]) +; IS________OPM-NEXT: call void @use_i8_internal(i8* nocapture [[M1]]) +; IS________OPM-NEXT: call void @use_i8_internal(i8* nocapture [[M1]]) ; IS________OPM-NEXT: ret void ; ; NOT_TUNIT_OPM-LABEL: define {{[^@]+}}@test13_use_alias() { ; NOT_TUNIT_OPM-NEXT: [[M1:%.*]] = tail call noalias i8* @malloc(i64 noundef 4) -; NOT_TUNIT_OPM-NEXT: call void @use_i8_internal(i8* nocapture [[M1]]) -; NOT_TUNIT_OPM-NEXT: call void @use_i8_internal(i8* nocapture [[M1]]) +; NOT_TUNIT_OPM-NEXT: [[C1:%.*]] = bitcast i8* [[M1]] to i16* +; NOT_TUNIT_OPM-NEXT: [[C2A:%.*]] = bitcast i16* [[C1]] to i8* +; NOT_TUNIT_OPM-NEXT: [[C2B:%.*]] = bitcast i16* [[C1]] to i8* +; NOT_TUNIT_OPM-NEXT: call void @use_i8_internal(i8* nocapture [[C2A]]) +; NOT_TUNIT_OPM-NEXT: call void @use_i8_internal(i8* nocapture [[C2B]]) ; NOT_TUNIT_OPM-NEXT: ret void ; %m1 = tail call noalias i8* @malloc(i64 4) diff --git a/llvm/test/Transforms/Attributor/nodelete.ll b/llvm/test/Transforms/Attributor/nodelete.ll --- a/llvm/test/Transforms/Attributor/nodelete.ll +++ b/llvm/test/Transforms/Attributor/nodelete.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals -; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=2 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM -; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=2 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM +; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM +; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM @@ -32,6 +32,7 @@ ; IS__CGSCC____-LABEL: define {{[^@]+}}@f2 ; IS__CGSCC____-SAME: () #[[ATTR0]] align 2 { ; IS__CGSCC____-NEXT: entry: +; IS__CGSCC____-NEXT: [[TMP0:%.*]] = bitcast %a* undef to %b* ; IS__CGSCC____-NEXT: ret i64 undef ; entry: diff --git a/llvm/test/Transforms/Attributor/nonnull.ll b/llvm/test/Transforms/Attributor/nonnull.ll --- a/llvm/test/Transforms/Attributor/nonnull.ll +++ b/llvm/test/Transforms/Attributor/nonnull.ll @@ -257,7 +257,7 @@ ; CHECK-NEXT: [[PHI:%.*]] = phi i8* [ [[RET]], [[ENTRY:%.*]] ], [ [[PHI]], [[LOOP]] ] ; CHECK-NEXT: br i1 [[C]], label [[LOOP]], label [[EXIT:%.*]] ; CHECK: exit: -; CHECK-NEXT: ret i8* [[PHI]] +; CHECK-NEXT: ret i8* [[RET]] ; entry: %ret = call i8* @ret_nonnull() diff --git a/llvm/test/Transforms/Attributor/range.ll b/llvm/test/Transforms/Attributor/range.ll --- a/llvm/test/Transforms/Attributor/range.ll +++ b/llvm/test/Transforms/Attributor/range.ll @@ -967,8 +967,7 @@ ; IS__TUNIT_OPM-NEXT: [[R2:%.*]] = call i32 @ret1or2(i1 [[C2]]) #[[ATTR5]], !range [[RNG4]] ; IS__TUNIT_OPM-NEXT: [[A:%.*]] = add i32 [[R1]], [[R2]] ; IS__TUNIT_OPM-NEXT: [[I1:%.*]] = icmp sle i32 [[A]], 3 -; IS__TUNIT_OPM-NEXT: [[F:%.*]] = and i1 [[I1]], true -; IS__TUNIT_OPM-NEXT: ret i1 [[F]] +; IS__TUNIT_OPM-NEXT: ret i1 [[I1]] ; ; IS________NPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn ; IS________NPM-LABEL: define {{[^@]+}}@callee_range_2 @@ -986,8 +985,7 @@ ; IS__CGSCC_OPM-NEXT: [[R2:%.*]] = call i32 @ret1or2(i1 [[C2]]) #[[ATTR5]], !range [[RNG5]] ; IS__CGSCC_OPM-NEXT: [[A:%.*]] = add i32 [[R1]], [[R2]] ; IS__CGSCC_OPM-NEXT: [[I1:%.*]] = icmp sle i32 [[A]], 3 -; IS__CGSCC_OPM-NEXT: [[F:%.*]] = and i1 [[I1]], true -; IS__CGSCC_OPM-NEXT: ret i1 [[F]] +; IS__CGSCC_OPM-NEXT: ret i1 [[I1]] ; %r1 = call i32 @ret1or2(i1 %c1) %r2 = call i32 @ret1or2(i1 %c2) diff --git a/llvm/test/Transforms/Attributor/value-simplify-gpu.ll b/llvm/test/Transforms/Attributor/value-simplify-gpu.ll --- a/llvm/test/Transforms/Attributor/value-simplify-gpu.ll +++ b/llvm/test/Transforms/Attributor/value-simplify-gpu.ll @@ -69,7 +69,6 @@ ; IS__CGSCC____-NEXT: call void @level2Kernelb() #[[ATTR4]] ; IS__CGSCC____-NEXT: br label [[IF_END]] ; IS__CGSCC____: if.end: -; IS__CGSCC____-NEXT: call void @level2Kernelall_late() #[[ATTR6:[0-9]+]] ; IS__CGSCC____-NEXT: ret void ; entry: @@ -112,7 +111,7 @@ ; IS__TUNIT____-NEXT: entry: ; IS__TUNIT____-NEXT: [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableKernel to i32*), align 4 ; IS__TUNIT____-NEXT: [[TMP1:%.*]] = load i32, i32* @ReachableKernelAS0, align 4 -; IS__TUNIT____-NEXT: call void @use(i32 [[TMP0]], i32 [[TMP1]], i32 noundef 42) #[[ATTR6:[0-9]+]] +; IS__TUNIT____-NEXT: call void @use(i32 noundef [[TMP0]], i32 noundef [[TMP1]], i32 noundef 42) #[[ATTR6:[0-9]+]] ; IS__TUNIT____-NEXT: ret void ; ; IS__CGSCC____: Function Attrs: norecurse nosync nounwind @@ -122,7 +121,7 @@ ; IS__CGSCC____-NEXT: [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableKernel to i32*), align 4 ; IS__CGSCC____-NEXT: [[TMP1:%.*]] = load i32, i32* @ReachableKernelAS0, align 4 ; IS__CGSCC____-NEXT: [[TMP2:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @UnreachableKernel to i32*), align 4 -; IS__CGSCC____-NEXT: call void @use(i32 [[TMP0]], i32 [[TMP1]], i32 noundef 42) #[[ATTR4]] +; IS__CGSCC____-NEXT: call void @use(i32 noundef [[TMP0]], i32 noundef [[TMP1]], i32 noundef 42) #[[ATTR4]] ; IS__CGSCC____-NEXT: ret void ; entry: @@ -140,7 +139,7 @@ ; IS__TUNIT____-NEXT: entry: ; IS__TUNIT____-NEXT: [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableKernel to i32*), align 4 ; IS__TUNIT____-NEXT: [[TMP1:%.*]] = load i32, i32* @ReachableKernelAS0, align 4 -; IS__TUNIT____-NEXT: call void @use(i32 [[TMP0]], i32 [[TMP1]], i32 noundef 42) #[[ATTR6]] +; IS__TUNIT____-NEXT: call void @use(i32 noundef [[TMP0]], i32 noundef [[TMP1]], i32 noundef 42) #[[ATTR6]] ; IS__TUNIT____-NEXT: ret void ; ; IS__CGSCC____: Function Attrs: norecurse nosync nounwind @@ -150,7 +149,7 @@ ; IS__CGSCC____-NEXT: [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableKernel to i32*), align 4 ; IS__CGSCC____-NEXT: [[TMP1:%.*]] = load i32, i32* @ReachableKernelAS0, align 4 ; IS__CGSCC____-NEXT: [[TMP2:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @UnreachableKernel to i32*), align 4 -; IS__CGSCC____-NEXT: call void @use(i32 [[TMP0]], i32 [[TMP1]], i32 noundef 42) #[[ATTR4]] +; IS__CGSCC____-NEXT: call void @use(i32 noundef [[TMP0]], i32 noundef [[TMP1]], i32 noundef 42) #[[ATTR4]] ; IS__CGSCC____-NEXT: ret void ; entry: @@ -236,7 +235,7 @@ ; IS__CGSCC_OPM-NEXT: call void @level2b(i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) undef) #[[ATTR4]] ; IS__CGSCC_OPM-NEXT: br label [[IF_END]] ; IS__CGSCC_OPM: if.end: -; IS__CGSCC_OPM-NEXT: call void @level2all_late(i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) undef) #[[ATTR7:[0-9]+]] +; IS__CGSCC_OPM-NEXT: call void @level2all_late(i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) undef) #[[ATTR6:[0-9]+]] ; IS__CGSCC_OPM-NEXT: ret void ; ; IS__CGSCC_NPM: Function Attrs: norecurse nosync nounwind @@ -251,10 +250,10 @@ ; IS__CGSCC_NPM-NEXT: call void @level2a(i32 undef) #[[ATTR4]] ; IS__CGSCC_NPM-NEXT: br label [[IF_END:%.*]] ; IS__CGSCC_NPM: if.else: -; IS__CGSCC_NPM-NEXT: call void @level2b(i32 undef) #[[ATTR7:[0-9]+]] +; IS__CGSCC_NPM-NEXT: call void @level2b(i32 undef) #[[ATTR6:[0-9]+]] ; IS__CGSCC_NPM-NEXT: br label [[IF_END]] ; IS__CGSCC_NPM: if.end: -; IS__CGSCC_NPM-NEXT: call void @level2all_late(i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) undef) #[[ATTR8:[0-9]+]] +; IS__CGSCC_NPM-NEXT: call void @level2all_late(i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) undef) #[[ATTR7:[0-9]+]] ; IS__CGSCC_NPM-NEXT: ret void ; entry: @@ -304,17 +303,17 @@ ; IS__TUNIT____-NEXT: entry: ; IS__TUNIT____-NEXT: [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableNonKernel to i32*), align 4 ; IS__TUNIT____-NEXT: [[TMP1:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4 -; IS__TUNIT____-NEXT: call void @use(i32 [[TMP0]], i32 [[TMP1]], i32 17) #[[ATTR6]] +; IS__TUNIT____-NEXT: call void @use(i32 noundef [[TMP0]], i32 noundef [[TMP1]], i32 17) #[[ATTR6]] ; IS__TUNIT____-NEXT: ret void ; ; IS__CGSCC_OPM: Function Attrs: norecurse nosync nounwind ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@level2a -; IS__CGSCC_OPM-SAME: (i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[ADDR:%.*]]) #[[ATTR1]] { +; IS__CGSCC_OPM-SAME: (i32* noalias nocapture nofree nonnull readnone align 4294967296 dereferenceable(4) [[ADDR:%.*]]) #[[ATTR1]] { ; IS__CGSCC_OPM-NEXT: entry: ; IS__CGSCC_OPM-NEXT: [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableNonKernel to i32*), align 4 ; IS__CGSCC_OPM-NEXT: [[TMP1:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4 -; IS__CGSCC_OPM-NEXT: [[TMP2:%.*]] = load i32, i32* undef, align 4 -; IS__CGSCC_OPM-NEXT: call void @use(i32 [[TMP0]], i32 [[TMP1]], i32 17) #[[ATTR4]] +; IS__CGSCC_OPM-NEXT: [[TMP2:%.*]] = load i32, i32* undef, align 4294967296 +; IS__CGSCC_OPM-NEXT: call void @use(i32 noundef [[TMP0]], i32 noundef [[TMP1]], i32 17) #[[ATTR4]] ; IS__CGSCC_OPM-NEXT: ret void ; ; IS__CGSCC_NPM: Function Attrs: norecurse nosync nounwind @@ -325,7 +324,7 @@ ; IS__CGSCC_NPM-NEXT: [[TMP1:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableNonKernel to i32*), align 4 ; IS__CGSCC_NPM-NEXT: [[TMP2:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4 ; IS__CGSCC_NPM-NEXT: [[TMP3:%.*]] = load i32, i32* [[ADDR_PRIV]], align 4 -; IS__CGSCC_NPM-NEXT: call void @use(i32 [[TMP1]], i32 [[TMP2]], i32 17) #[[ATTR4]] +; IS__CGSCC_NPM-NEXT: call void @use(i32 noundef [[TMP1]], i32 noundef [[TMP2]], i32 17) #[[ATTR4]] ; IS__CGSCC_NPM-NEXT: ret void ; entry: @@ -343,17 +342,17 @@ ; IS__TUNIT____-NEXT: entry: ; IS__TUNIT____-NEXT: [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableNonKernel to i32*), align 4 ; IS__TUNIT____-NEXT: [[TMP1:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4 -; IS__TUNIT____-NEXT: call void @use(i32 [[TMP0]], i32 [[TMP1]], i32 17) #[[ATTR6]] +; IS__TUNIT____-NEXT: call void @use(i32 noundef [[TMP0]], i32 noundef [[TMP1]], i32 17) #[[ATTR6]] ; IS__TUNIT____-NEXT: ret void ; ; IS__CGSCC_OPM: Function Attrs: norecurse nosync nounwind ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@level2b -; IS__CGSCC_OPM-SAME: (i32* noalias nocapture nofree nonnull readnone align 4 dereferenceable(4) [[ADDR:%.*]]) #[[ATTR1]] { +; IS__CGSCC_OPM-SAME: (i32* noalias nocapture nofree nonnull readnone align 4294967296 dereferenceable(4) [[ADDR:%.*]]) #[[ATTR1]] { ; IS__CGSCC_OPM-NEXT: entry: ; IS__CGSCC_OPM-NEXT: [[TMP0:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableNonKernel to i32*), align 4 ; IS__CGSCC_OPM-NEXT: [[TMP1:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4 -; IS__CGSCC_OPM-NEXT: [[TMP2:%.*]] = load i32, i32* undef, align 4 -; IS__CGSCC_OPM-NEXT: call void @use(i32 [[TMP0]], i32 [[TMP1]], i32 17) #[[ATTR4]] +; IS__CGSCC_OPM-NEXT: [[TMP2:%.*]] = load i32, i32* undef, align 4294967296 +; IS__CGSCC_OPM-NEXT: call void @use(i32 noundef [[TMP0]], i32 noundef [[TMP1]], i32 17) #[[ATTR4]] ; IS__CGSCC_OPM-NEXT: ret void ; ; IS__CGSCC_NPM: Function Attrs: norecurse nosync nounwind @@ -364,7 +363,7 @@ ; IS__CGSCC_NPM-NEXT: [[TMP1:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @ReachableNonKernel to i32*), align 4 ; IS__CGSCC_NPM-NEXT: [[TMP2:%.*]] = load i32, i32* addrspacecast (i32 addrspace(3)* @UnreachableNonKernel to i32*), align 4 ; IS__CGSCC_NPM-NEXT: [[TMP3:%.*]] = load i32, i32* [[ADDR_PRIV]], align 4 -; IS__CGSCC_NPM-NEXT: call void @use(i32 [[TMP1]], i32 [[TMP2]], i32 17) #[[ATTR4]] +; IS__CGSCC_NPM-NEXT: call void @use(i32 noundef [[TMP1]], i32 noundef [[TMP2]], i32 17) #[[ATTR4]] ; IS__CGSCC_NPM-NEXT: ret void ; entry: @@ -413,8 +412,7 @@ ; IS__CGSCC_OPM: attributes #[[ATTR3]] = { nofree norecurse nosync nounwind readnone willreturn } ; IS__CGSCC_OPM: attributes #[[ATTR4]] = { nounwind } ; IS__CGSCC_OPM: attributes #[[ATTR5]] = { nounwind willreturn writeonly } -; IS__CGSCC_OPM: attributes #[[ATTR6]] = { nounwind readnone } -; IS__CGSCC_OPM: attributes #[[ATTR7]] = { nounwind writeonly } +; IS__CGSCC_OPM: attributes #[[ATTR6]] = { nounwind writeonly } ;. ; IS__CGSCC_NPM: attributes #[[ATTR0]] = { norecurse nosync nounwind "kernel" } ; IS__CGSCC_NPM: attributes #[[ATTR1]] = { norecurse nosync nounwind } @@ -422,7 +420,6 @@ ; IS__CGSCC_NPM: attributes #[[ATTR3]] = { nofree norecurse nosync nounwind readnone willreturn } ; IS__CGSCC_NPM: attributes #[[ATTR4]] = { nounwind } ; IS__CGSCC_NPM: attributes #[[ATTR5]] = { nounwind willreturn writeonly } -; IS__CGSCC_NPM: attributes #[[ATTR6]] = { nounwind readnone } -; IS__CGSCC_NPM: attributes #[[ATTR7]] = { nosync nounwind } -; IS__CGSCC_NPM: attributes #[[ATTR8]] = { nosync nounwind writeonly } +; IS__CGSCC_NPM: attributes #[[ATTR6]] = { nosync nounwind } +; IS__CGSCC_NPM: attributes #[[ATTR7]] = { nosync nounwind writeonly } ;. diff --git a/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll b/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll --- a/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll +++ b/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll @@ -3183,8 +3183,7 @@ ; IS__TUNIT_OPM-NEXT: [[TMP0:%.*]] = bitcast i8* [[CALL]] to i32* ; IS__TUNIT_OPM-NEXT: store i32 [[X]], i32* [[TMP0]], align 4 ; IS__TUNIT_OPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -; IS__TUNIT_OPM-NEXT: [[TMP2:%.*]] = bitcast i32* [[TMP0]] to i8* -; IS__TUNIT_OPM-NEXT: call void @free(i8* noundef [[TMP2]]) #[[ATTR17]] +; IS__TUNIT_OPM-NEXT: call void @free(i8* noundef [[CALL]]) #[[ATTR17]] ; IS__TUNIT_OPM-NEXT: ret i32 [[TMP1]] ; ; IS________NPM-LABEL: define {{[^@]+}}@round_trip_malloc @@ -3203,8 +3202,7 @@ ; IS__CGSCC_OPM-NEXT: [[TMP0:%.*]] = bitcast i8* [[CALL]] to i32* ; IS__CGSCC_OPM-NEXT: store i32 [[X]], i32* [[TMP0]], align 4 ; IS__CGSCC_OPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -; IS__CGSCC_OPM-NEXT: [[TMP2:%.*]] = bitcast i32* [[TMP0]] to i8* -; IS__CGSCC_OPM-NEXT: call void @free(i8* noundef [[TMP2]]) #[[ATTR16]] +; IS__CGSCC_OPM-NEXT: call void @free(i8* noundef [[CALL]]) #[[ATTR16]] ; IS__CGSCC_OPM-NEXT: ret i32 [[TMP1]] ; entry: @@ -3224,8 +3222,7 @@ ; IS__TUNIT_OPM-NEXT: [[TMP0:%.*]] = bitcast i8* [[CALL]] to i32* ; IS__TUNIT_OPM-NEXT: store i32 7, i32* [[TMP0]], align 4 ; IS__TUNIT_OPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -; IS__TUNIT_OPM-NEXT: [[TMP2:%.*]] = bitcast i32* [[TMP0]] to i8* -; IS__TUNIT_OPM-NEXT: call void @free(i8* noundef [[TMP2]]) #[[ATTR17]] +; IS__TUNIT_OPM-NEXT: call void @free(i8* noundef [[CALL]]) #[[ATTR17]] ; IS__TUNIT_OPM-NEXT: ret i32 [[TMP1]] ; ; IS________NPM-LABEL: define {{[^@]+}}@round_trip_malloc_constant() { @@ -3238,8 +3235,7 @@ ; IS__CGSCC_OPM-NEXT: [[TMP0:%.*]] = bitcast i8* [[CALL]] to i32* ; IS__CGSCC_OPM-NEXT: store i32 7, i32* [[TMP0]], align 4 ; IS__CGSCC_OPM-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -; IS__CGSCC_OPM-NEXT: [[TMP2:%.*]] = bitcast i32* [[TMP0]] to i8* -; IS__CGSCC_OPM-NEXT: call void @free(i8* noundef [[TMP2]]) #[[ATTR16]] +; IS__CGSCC_OPM-NEXT: call void @free(i8* noundef [[CALL]]) #[[ATTR16]] ; IS__CGSCC_OPM-NEXT: ret i32 [[TMP1]] ; entry: @@ -3417,6 +3413,7 @@ ; IS________NPM-NEXT: br label [[IF_END]] ; IS________NPM: if.end: ; IS________NPM-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +; IS________NPM-NEXT: [[TMP3:%.*]] = bitcast i32* [[TMP1]] to i8* ; IS________NPM-NEXT: ret i32 [[TMP2]] ; ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@conditional_calloc @@ -3473,10 +3470,12 @@ ; IS________NPM-NEXT: entry: ; IS________NPM-NEXT: [[TMP0:%.*]] = alloca i8, i64 4, align 1 ; IS________NPM-NEXT: call void @llvm.memset.p0i8.i64(i8* [[TMP0]], i8 0, i64 4, i1 false) +; IS________NPM-NEXT: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to i32* ; IS________NPM-NEXT: br i1 [[C]], label [[IF_END:%.*]], label [[IF_THEN:%.*]] ; IS________NPM: if.then: ; IS________NPM-NEXT: br label [[IF_END]] ; IS________NPM: if.end: +; IS________NPM-NEXT: [[TMP2:%.*]] = bitcast i32* [[TMP1]] to i8* ; IS________NPM-NEXT: ret i32 0 ; ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@conditional_calloc_zero diff --git a/llvm/test/Transforms/Attributor/value-simplify.ll b/llvm/test/Transforms/Attributor/value-simplify.ll --- a/llvm/test/Transforms/Attributor/value-simplify.ll +++ b/llvm/test/Transforms/Attributor/value-simplify.ll @@ -903,7 +903,9 @@ ; ; IS__CGSCC____-LABEL: define {{[^@]+}}@test_callee_is_undef ; IS__CGSCC____-SAME: (void (i32)* nocapture nofree [[FN:%.*]]) { -; IS__CGSCC____-NEXT: unreachable +; IS__CGSCC____-NEXT: call void @callee_is_undef() +; IS__CGSCC____-NEXT: call void @unknown_calle_arg_is_undef(void (i32)* nocapture nofree noundef nonnull [[FN]]) +; IS__CGSCC____-NEXT: ret void ; call void @callee_is_undef(void ()* undef) call void @unknown_calle_arg_is_undef(void (i32)* %fn, i32 undef) @@ -911,14 +913,9 @@ } define internal void @callee_is_undef(void ()* %fn) { ; -; IS__TUNIT____-LABEL: define {{[^@]+}}@callee_is_undef() { -; IS__TUNIT____-NEXT: call void undef() -; IS__TUNIT____-NEXT: ret void -; -; IS__CGSCC____-LABEL: define {{[^@]+}}@callee_is_undef -; IS__CGSCC____-SAME: (void ()* nocapture nofree noundef nonnull [[FN:%.*]]) { -; IS__CGSCC____-NEXT: call void [[FN]]() -; IS__CGSCC____-NEXT: ret void +; CHECK-LABEL: define {{[^@]+}}@callee_is_undef() { +; CHECK-NEXT: call void undef() +; CHECK-NEXT: ret void ; call void %fn() ret void diff --git a/llvm/test/Transforms/ConstraintElimination/add-nuw.ll b/llvm/test/Transforms/ConstraintElimination/add-nuw.ll --- a/llvm/test/Transforms/ConstraintElimination/add-nuw.ll +++ b/llvm/test/Transforms/ConstraintElimination/add-nuw.ll @@ -454,3 +454,127 @@ } declare void @use(i1) + +define i1 @add_nuw_neg_pr54224_i16(i16 %a) { +; CHECK-LABEL: @add_nuw_neg_pr54224_i16( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[NEG2:%.*]] = add nuw i16 [[A:%.*]], -305 +; CHECK-NEXT: [[C_1:%.*]] = icmp ugt i16 0, [[NEG2]] +; CHECK-NEXT: br i1 [[C_1]], label [[EXIT_1:%.*]], label [[EXIT_2:%.*]] +; CHECK: exit.1: +; CHECK-NEXT: [[C_2:%.*]] = icmp ugt i16 [[A]], 0 +; CHECK-NEXT: ret i1 false +; CHECK: exit.2: +; CHECK-NEXT: [[C_3:%.*]] = icmp ugt i16 [[A]], 0 +; CHECK-NEXT: ret i1 [[C_3]] +; +entry: + %neg2 = add nuw i16 %a, -305 + %c.1 = icmp ugt i16 0, %neg2 + br i1 %c.1, label %exit.1, label %exit.2 + +exit.1: + %c.2 = icmp ugt i16 %a, 0 + ret i1 %c.2 + +exit.2: + %c.3 = icmp ugt i16 %a, 0 + ret i1 %c.3 +} + +define i1 @add_nuw_neg_pr54224_i64(i64 %a) { +; CHECK-LABEL: @add_nuw_neg_pr54224_i64( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[NEG2:%.*]] = add nuw i64 [[A:%.*]], -305 +; CHECK-NEXT: [[C_1:%.*]] = icmp ugt i64 0, [[NEG2]] +; CHECK-NEXT: br i1 [[C_1]], label [[EXIT_1:%.*]], label [[EXIT_2:%.*]] +; CHECK: exit.1: +; CHECK-NEXT: [[C_2:%.*]] = icmp ugt i64 [[A]], 0 +; CHECK-NEXT: ret i1 [[C_2]] +; CHECK: exit.2: +; CHECK-NEXT: [[C_3:%.*]] = icmp ugt i64 [[A]], 0 +; CHECK-NEXT: ret i1 [[C_3]] +; +entry: + %neg2 = add nuw i64 %a, -305 + %c.1 = icmp ugt i64 0, %neg2 + br i1 %c.1, label %exit.1, label %exit.2 + +exit.1: + %c.2 = icmp ugt i64 %a, 0 + ret i1 %c.2 + +exit.2: + %c.3 = icmp ugt i64 %a, 0 + ret i1 %c.3 +} + +define i1 @add_nuw_neg2_i8(i8 %a) { +; CHECK-LABEL: @add_nuw_neg2_i8( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[NEG2:%.*]] = add nuw i8 [[A:%.*]], -4 +; CHECK-NEXT: [[C_1:%.*]] = icmp ult i8 [[NEG2]], -2 +; CHECK-NEXT: br i1 [[C_1]], label [[EXIT_1:%.*]], label [[EXIT_2:%.*]] +; CHECK: exit.1: +; CHECK-NEXT: [[T_1:%.*]] = icmp ult i8 [[A]], 2 +; CHECK-NEXT: [[C_2:%.*]] = icmp ult i8 [[A]], 1 +; CHECK-NEXT: [[RES_1:%.*]] = xor i1 true, [[C_2]] +; CHECK-NEXT: ret i1 [[RES_1]] +; CHECK: exit.2: +; CHECK-NEXT: [[C_3:%.*]] = icmp ult i8 [[A]], 3 +; CHECK-NEXT: [[F_1:%.*]] = icmp ult i8 [[A]], 2 +; CHECK-NEXT: [[RES_2:%.*]] = xor i1 [[C_3]], false +; CHECK-NEXT: ret i1 [[RES_2]] +; +entry: + %neg2 = add nuw i8 %a, -4 + %c.1 = icmp ult i8 %neg2, -2 + br i1 %c.1, label %exit.1, label %exit.2 + +exit.1: + %t.1 = icmp ult i8 %a, 2 + %c.2 = icmp ult i8 %a, 1 + %res.1 = xor i1 %t.1, %c.2 + ret i1 %res.1 + +exit.2: + %c.3 = icmp ult i8 %a, 3 + %f.1 = icmp ult i8 %a, 2 + %res.2 = xor i1 %c.3, %f.1 + ret i1 %res.2 +} + +define i1 @add_nuw_neg2_i64(i64 %a) { +; CHECK-LABEL: @add_nuw_neg2_i64( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[NEG2:%.*]] = add nuw i64 [[A:%.*]], -4 +; CHECK-NEXT: [[C_1:%.*]] = icmp ult i64 [[NEG2]], -2 +; CHECK-NEXT: br i1 [[C_1]], label [[EXIT_1:%.*]], label [[EXIT_2:%.*]] +; CHECK: exit.1: +; CHECK-NEXT: [[T_1:%.*]] = icmp ult i64 [[A]], 2 +; CHECK-NEXT: [[C_2:%.*]] = icmp ult i64 [[A]], 1 +; CHECK-NEXT: [[RES_1:%.*]] = xor i1 [[T_1]], [[C_2]] +; CHECK-NEXT: ret i1 [[RES_1]] +; CHECK: exit.2: +; CHECK-NEXT: [[C_3:%.*]] = icmp ult i64 [[A]], 3 +; CHECK-NEXT: [[F_1:%.*]] = icmp ult i64 [[A]], 2 +; CHECK-NEXT: [[RES_2:%.*]] = xor i1 [[C_3]], [[F_1]] +; CHECK-NEXT: ret i1 [[RES_2]] +; +entry: + %neg2 = add nuw i64 %a, -4 + %c.1 = icmp ult i64 %neg2, -2 + br i1 %c.1, label %exit.1, label %exit.2 + +exit.1: + %t.1 = icmp ult i64 %a, 2 + %c.2 = icmp ult i64 %a, 1 + %res.1 = xor i1 %t.1, %c.2 + ret i1 %res.1 + +exit.2: + %c.3 = icmp ult i64 %a, 3 + %f.1 = icmp ult i64 %a, 2 + %res.2 = xor i1 %c.3, %f.1 + ret i1 %res.2 +} diff --git a/llvm/test/Transforms/ConstraintElimination/assumes.ll b/llvm/test/Transforms/ConstraintElimination/assumes.ll --- a/llvm/test/Transforms/ConstraintElimination/assumes.ll +++ b/llvm/test/Transforms/ConstraintElimination/assumes.ll @@ -152,6 +152,34 @@ ret i1 %res.4 } +; Test case from PR54217. +define i1 @assume_does_not_dominates_successor_with_may_unwind_call_before_assume(i16 %a, i1 %i.0) { +; CHECK-LABEL: @assume_does_not_dominates_successor_with_may_unwind_call_before_assume( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[I_0:%.*]], label [[EXIT:%.*]], label [[IF_THEN:%.*]] +; CHECK: if.then: +; CHECK-NEXT: call void @may_unwind() +; CHECK-NEXT: [[C_1:%.*]] = icmp eq i16 [[A:%.*]], 0 +; CHECK-NEXT: call void @llvm.assume(i1 [[C_1]]) +; CHECK-NEXT: br label [[EXIT]] +; CHECK: exit: +; CHECK-NEXT: [[C_2:%.*]] = icmp eq i16 [[A]], 0 +; CHECK-NEXT: ret i1 [[C_2]] +; +entry: + br i1 %i.0, label %exit, label %if.then + +if.then: + call void @may_unwind() + %c.1 = icmp eq i16 %a, 0 + call void @llvm.assume(i1 %c.1) + br label %exit + +exit: + %c.2 = icmp eq i16 %a, 0 + ret i1 %c.2 +} + define i1 @assume_single_bb(i8 %a, i8 %b, i1 %c) { ; CHECK-LABEL: @assume_single_bb( ; CHECK-NEXT: [[ADD_1:%.*]] = add nuw nsw i8 [[A:%.*]], 1 @@ -489,3 +517,4 @@ %res.2 = xor i1 %res.1, %c.2 ret i1 %res.2 } + diff --git a/llvm/test/Transforms/ConstraintElimination/pr54228-variable-name-order.ll b/llvm/test/Transforms/ConstraintElimination/pr54228-variable-name-order.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/ConstraintElimination/pr54228-variable-name-order.ll @@ -0,0 +1,55 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes=constraint-elimination -S %s | FileCheck %s + + +declare void @fn() + +define i1 @test_pr54228(i32 %a, i32 %b, i1 %i.0, i1 %i.1) { +; CHECK-LABEL: @test_pr54228( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 [[I_0:%.*]], label [[PH_1:%.*]], label [[LOOP_HEADER:%.*]] +; CHECK: ph.1: +; CHECK-NEXT: [[C_1:%.*]] = icmp eq i32 [[A:%.*]], 0 +; CHECK-NEXT: call void @llvm.assume(i1 [[C_1]]) +; CHECK-NEXT: br label [[LOOP_HEADER]] +; CHECK: loop.header: +; CHECK-NEXT: br i1 [[I_1:%.*]], label [[LOOP_THEN:%.*]], label [[LOOP_LATCH:%.*]] +; CHECK: loop.then: +; CHECK-NEXT: call void @fn() +; CHECK-NEXT: [[C_2:%.*]] = icmp eq i32 [[B:%.*]], 0 +; CHECK-NEXT: call void @llvm.assume(i1 [[C_2]]) +; CHECK-NEXT: br label [[LOOP_LATCH]] +; CHECK: loop.latch: +; CHECK-NEXT: [[C_3:%.*]] = icmp eq i32 [[B]], 1 +; CHECK-NEXT: br i1 [[C_3]], label [[EXIT:%.*]], label [[LOOP_HEADER]] +; CHECK: exit: +; CHECK-NEXT: [[C_4:%.*]] = icmp eq i32 [[A]], 0 +; CHECK-NEXT: ret i1 [[C_4]] +; +entry: + br i1 %i.0, label %ph.1, label %loop.header + +ph.1: ; preds = %entry + %c.1 = icmp eq i32 %a, 0 + call void @llvm.assume(i1 %c.1) + br label %loop.header + +loop.header: ; preds = %loop.latch, %ph.1, %entry + br i1 %i.1, label %loop.then, label %loop.latch + +loop.then: ; preds = %loop.header + call void @fn() + %c.2 = icmp eq i32 %b, 0 + call void @llvm.assume(i1 %c.2) + br label %loop.latch + +loop.latch: ; preds = %loop.then, %loop.header + %c.3 = icmp eq i32 %b, 1 + br i1 %c.3, label %exit, label %loop.header + +exit: ; preds = %loop.latch + %c.4 = icmp eq i32 %a, 0 + ret i1 %c.4 +} + +declare void @llvm.assume(i1 noundef) diff --git a/llvm/test/Transforms/ConstraintElimination/sub-nuw.ll b/llvm/test/Transforms/ConstraintElimination/sub-nuw.ll --- a/llvm/test/Transforms/ConstraintElimination/sub-nuw.ll +++ b/llvm/test/Transforms/ConstraintElimination/sub-nuw.ll @@ -272,3 +272,84 @@ } declare void @use(i1) + +define i1 @sub_nuw_i16_simp(i16 %a) { +; CHECK-LABEL: @sub_nuw_i16_simp( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[NEG2:%.*]] = sub nuw i16 [[A:%.*]], 305 +; CHECK-NEXT: [[C_1:%.*]] = icmp ugt i16 0, [[NEG2]] +; CHECK-NEXT: br i1 [[C_1]], label [[EXIT_1:%.*]], label [[EXIT_2:%.*]] +; CHECK: exit.1: +; CHECK-NEXT: [[C_2:%.*]] = icmp ugt i16 [[A]], 0 +; CHECK-NEXT: ret i1 [[C_2]] +; CHECK: exit.2: +; CHECK-NEXT: [[C_3:%.*]] = icmp ugt i16 [[A]], 0 +; CHECK-NEXT: ret i1 true +; +entry: + %neg2 = sub nuw i16 %a, 305 + %c.1 = icmp ugt i16 0, %neg2 + br i1 %c.1, label %exit.1, label %exit.2 + +exit.1: + %c.2 = icmp ugt i16 %a, 0 + ret i1 %c.2 + +exit.2: + %c.3 = icmp ugt i16 %a, 0 + ret i1 %c.3 +} + +define i1 @sub_nuw_i64_simp(i64 %a) { +; CHECK-LABEL: @sub_nuw_i64_simp( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[NEG2:%.*]] = sub nuw i64 [[A:%.*]], 305 +; CHECK-NEXT: [[C_1:%.*]] = icmp ugt i64 0, [[NEG2]] +; CHECK-NEXT: br i1 [[C_1]], label [[EXIT_1:%.*]], label [[EXIT_2:%.*]] +; CHECK: exit.1: +; CHECK-NEXT: [[C_2:%.*]] = icmp ugt i64 [[A]], 0 +; CHECK-NEXT: ret i1 [[C_2]] +; CHECK: exit.2: +; CHECK-NEXT: [[C_3:%.*]] = icmp ugt i64 [[A]], 0 +; CHECK-NEXT: ret i1 true +; +entry: + %neg2 = sub nuw i64 %a, 305 + %c.1 = icmp ugt i64 0, %neg2 + br i1 %c.1, label %exit.1, label %exit.2 + +exit.1: + %c.2 = icmp ugt i64 %a, 0 + ret i1 %c.2 + +exit.2: + %c.3 = icmp ugt i64 %a, 0 + ret i1 %c.3 +} + +define i1 @sub_nuw_neg_i16(i16 %a) { +; CHECK-LABEL: @sub_nuw_neg_i16( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[NEG2:%.*]] = sub nuw i16 [[A:%.*]], -305 +; CHECK-NEXT: [[C_1:%.*]] = icmp ugt i16 0, [[NEG2]] +; CHECK-NEXT: br i1 [[C_1]], label [[EXIT_1:%.*]], label [[EXIT_2:%.*]] +; CHECK: exit.1: +; CHECK-NEXT: [[C_2:%.*]] = icmp ugt i16 [[A]], 0 +; CHECK-NEXT: ret i1 false +; CHECK: exit.2: +; CHECK-NEXT: [[C_3:%.*]] = icmp ugt i16 [[A]], 0 +; CHECK-NEXT: ret i1 [[C_3]] +; +entry: + %neg2 = sub nuw i16 %a, -305 + %c.1 = icmp ugt i16 0, %neg2 + br i1 %c.1, label %exit.1, label %exit.2 + +exit.1: + %c.2 = icmp ugt i16 %a, 0 + ret i1 %c.2 + +exit.2: + %c.3 = icmp ugt i16 %a, 0 + ret i1 %c.3 +} diff --git a/llvm/test/Transforms/Coroutines/coro-elide-musttail.ll b/llvm/test/Transforms/Coroutines/coro-elide-musttail.ll --- a/llvm/test/Transforms/Coroutines/coro-elide-musttail.ll +++ b/llvm/test/Transforms/Coroutines/coro-elide-musttail.ll @@ -13,7 +13,7 @@ @"bar.resumers" = private constant [3 x void (%"bar.Frame"*)*] [void (%"bar.Frame"*)* @"bar.resume", void (%"bar.Frame"*)* undef, void (%"bar.Frame"*)* undef] declare dso_local void @"bar"() align 2 -declare dso_local fastcc void @"bar.resume"(%"bar.Frame"*) align 2 +declare dso_local fastcc void @"bar.resume"(%"bar.Frame"* align 8 dereferenceable(24)) align 2 ; There is a musttail call. ; With alias analysis, we can tell that the frame does not interfere with CALL34, and hence we can keep the tailcalls. diff --git a/llvm/test/Transforms/Coroutines/coro-elide-stat.ll b/llvm/test/Transforms/Coroutines/coro-elide-stat.ll --- a/llvm/test/Transforms/Coroutines/coro-elide-stat.ll +++ b/llvm/test/Transforms/Coroutines/coro-elide-stat.ll @@ -17,7 +17,7 @@ declare void @print(i32) nounwind ; resume part of the coroutine -define fastcc void @f.resume(i8*) { +define fastcc void @f.resume(i8* dereferenceable(1)) { tail call void @print(i32 0) ret void } diff --git a/llvm/test/Transforms/Coroutines/coro-elide.ll b/llvm/test/Transforms/Coroutines/coro-elide.ll --- a/llvm/test/Transforms/Coroutines/coro-elide.ll +++ b/llvm/test/Transforms/Coroutines/coro-elide.ll @@ -7,7 +7,7 @@ declare void @print(i32) nounwind ; resume part of the coroutine -define fastcc void @f.resume(i8*) { +define fastcc void @f.resume(i8* dereferenceable(1)) { tail call void @print(i32 0) ret void } diff --git a/llvm/test/Transforms/Coroutines/coro-heap-elide.ll b/llvm/test/Transforms/Coroutines/coro-heap-elide.ll --- a/llvm/test/Transforms/Coroutines/coro-heap-elide.ll +++ b/llvm/test/Transforms/Coroutines/coro-heap-elide.ll @@ -11,7 +11,7 @@ declare void @bar(i8*) -declare fastcc void @f.resume(%f.frame*) +declare fastcc void @f.resume(%f.frame* align 4 dereferenceable(4)) declare fastcc void @f.destroy(%f.frame*) declare fastcc void @f.cleanup(%f.frame*) diff --git a/llvm/test/Transforms/Coroutines/coro-retcon-opaque-ptr.ll b/llvm/test/Transforms/Coroutines/coro-retcon-opaque-ptr.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/Coroutines/coro-retcon-opaque-ptr.ll @@ -0,0 +1,98 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -enable-coroutines -passes='default' -opaque-pointers -S | FileCheck %s + +; Same test as coro-retcon.ll, but with opaque pointers enabled. + +define ptr @f(ptr %buffer, i32 %n) { +; CHECK-LABEL: @f( +; CHECK-NEXT: coro.return: +; CHECK-NEXT: store i32 [[N:%.*]], ptr [[BUFFER:%.*]], align 4 +; CHECK-NEXT: tail call void @print(i32 [[N]]) +; CHECK-NEXT: ret ptr @f.resume.0 +; +entry: + %id = call token @llvm.coro.id.retcon(i32 8, i32 4, ptr %buffer, ptr @prototype, ptr @allocate, ptr @deallocate) + %hdl = call ptr @llvm.coro.begin(token %id, ptr null) + br label %loop + +loop: ; preds = %resume, %entry + %n.val = phi i32 [ %n, %entry ], [ %inc, %resume ] + call void @print(i32 %n.val) + %unwind0 = call i1 (...) @llvm.coro.suspend.retcon.i1() + br i1 %unwind0, label %cleanup, label %resume + +resume: ; preds = %loop + %inc = add i32 %n.val, 1 + br label %loop + +cleanup: ; preds = %loop + %0 = call i1 @llvm.coro.end(ptr %hdl, i1 false) + unreachable +} + +define i32 @main() { +; CHECK-LABEL: @main( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = alloca [8 x i8], align 4 +; CHECK-NEXT: store i32 4, ptr [[TMP0]], align 4 +; CHECK-NEXT: call void @print(i32 4) +; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META0:![0-9]+]]) +; CHECK-NEXT: [[N_VAL_RELOAD_I:%.*]] = load i32, ptr [[TMP0]], align 4, !alias.scope !0 +; CHECK-NEXT: [[INC_I:%.*]] = add i32 [[N_VAL_RELOAD_I]], 1 +; CHECK-NEXT: store i32 [[INC_I]], ptr [[TMP0]], align 4, !alias.scope !0 +; CHECK-NEXT: call void @print(i32 [[INC_I]]), !noalias !0 +; CHECK-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +; CHECK-NEXT: [[N_VAL_RELOAD_I1:%.*]] = load i32, ptr [[TMP0]], align 4, !alias.scope !3 +; CHECK-NEXT: [[INC_I2:%.*]] = add i32 [[N_VAL_RELOAD_I1]], 1 +; CHECK-NEXT: call void @print(i32 [[INC_I2]]), !noalias !3 +; CHECK-NEXT: ret i32 0 +; +entry: + %0 = alloca [8 x i8], align 4 + %prepare = call ptr @llvm.coro.prepare.retcon(ptr @f) + %cont0 = call ptr %prepare(ptr %0, i32 4) + %cont1 = call ptr %cont0(ptr %0, i1 zeroext false) + %cont2 = call ptr %cont1(ptr %0, i1 zeroext false) + %1 = call ptr %cont2(ptr %0, i1 zeroext true) + ret i32 0 +} + +define hidden { ptr, ptr } @g(ptr %buffer, ptr %ptr) { +; CHECK-LABEL: @g( +; CHECK-NEXT: coro.return: +; CHECK-NEXT: [[TMP0:%.*]] = tail call ptr @allocate(i32 8) +; CHECK-NEXT: store ptr [[TMP0]], ptr [[BUFFER:%.*]], align 8 +; CHECK-NEXT: store ptr [[PTR:%.*]], ptr [[TMP0]], align 8 +; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { ptr, ptr } { ptr @g.resume.0, ptr undef }, ptr [[PTR]], 1 +; CHECK-NEXT: ret { ptr, ptr } [[TMP1]] +; +entry: + %id = call token @llvm.coro.id.retcon(i32 8, i32 4, ptr %buffer, ptr @g_prototype, ptr @allocate, ptr @deallocate) + %hdl = call ptr @llvm.coro.begin(token %id, ptr null) + br label %loop + +loop: ; preds = %resume, %entry + %unwind0 = call i1 (...) @llvm.coro.suspend.retcon.i1(ptr %ptr) + br i1 %unwind0, label %cleanup, label %resume + +resume: ; preds = %loop + br label %loop + +cleanup: ; preds = %loop + %0 = call i1 @llvm.coro.end(ptr %hdl, i1 false) + unreachable +} + +declare token @llvm.coro.id.retcon(i32, i32, i8*, i8*, i8*, i8*) +declare i8* @llvm.coro.begin(token, i8*) +declare i1 @llvm.coro.suspend.retcon.i1(...) +declare i1 @llvm.coro.end(i8*, i1) +declare i8* @llvm.coro.prepare.retcon(i8*) + +declare i8* @prototype(i8*, i1 zeroext) +declare {i8*,i8*} @g_prototype(i8*, i1 zeroext) + +declare noalias i8* @allocate(i32 %size) +declare void @deallocate(i8* %ptr) + +declare void @print(i32) diff --git a/llvm/test/Transforms/IndVarSimplify/X86/deterministic-scev-verify.ll b/llvm/test/Transforms/IndVarSimplify/X86/deterministic-scev-verify.ll --- a/llvm/test/Transforms/IndVarSimplify/X86/deterministic-scev-verify.ll +++ b/llvm/test/Transforms/IndVarSimplify/X86/deterministic-scev-verify.ll @@ -1,18 +1,13 @@ -; RUN: opt -indvars -stats -disable-output < %s 2>&1 | FileCheck %s --check-prefix=STATS -; RUN: opt -indvars -S < %s | FileCheck %s --check-prefix=IR -; REQUIRES: asserts +; RUN: opt -indvars -S < %s | FileCheck %s ; Check that IndVarSimplify's result is not influenced by stray calls to ; ScalarEvolution in debug builds. However, -verify-indvars may still do ; such calls. ; llvm.org/PR44815 -; STATS: 1 scalar-evolution - Number of loops with trip counts computed by force -; STATS: 2 scalar-evolution - Number of loops with predictable loop counts - ; In this test, adding -verify-indvars causes %tmp13 to not be optimized away. -; IR-LABEL: @foo -; IR-NOT: phi i32 +; CHECK-LABEL: @foo +; CHECK-NOT: phi i32 target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/IndVarSimplify/X86/pr35406.ll b/llvm/test/Transforms/IndVarSimplify/X86/pr35406.ll --- a/llvm/test/Transforms/IndVarSimplify/X86/pr35406.ll +++ b/llvm/test/Transforms/IndVarSimplify/X86/pr35406.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -passes='loop(indvars),verify' %s | FileCheck %s +; RUN: opt -S -indvars -verify-scev %s | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/LICM/loop-sink-phi-in-preheader.ll b/llvm/test/Transforms/LICM/loop-sink-phi-in-preheader.ll new file mode 100644 --- /dev/null +++ b/llvm/test/Transforms/LICM/loop-sink-phi-in-preheader.ll @@ -0,0 +1,33 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -S -passes=loop-sink %s | FileCheck %s + +define void @preheader_of_inner_has_phi() !prof !0 { +; CHECK-LABEL: @preheader_of_inner_has_phi( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[OUTER_HEADER:%.*]] +; CHECK: outer.header: +; CHECK-NEXT: [[PH_1:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[PH_2:%.*]], [[OUTER_LATCH:%.*]] ] +; CHECK-NEXT: br label [[INNER:%.*]] +; CHECK: inner: +; CHECK-NEXT: [[PH_2]] = phi i32 [ 0, [[OUTER_HEADER]] ], [ 0, [[INNER]] ] +; CHECK-NEXT: br i1 false, label [[OUTER_LATCH]], label [[INNER]], !prof [[PROF1:![0-9]+]] +; CHECK: outer.latch: +; CHECK-NEXT: br label [[OUTER_HEADER]] +; +entry: + br label %outer.header + +outer.header: + %ph.1 = phi i32 [ 0, %entry ], [ %ph.2, %outer.latch ] + br label %inner + +inner: + %ph.2 = phi i32 [ 0, %outer.header ], [ 0, %inner ] + br i1 false, label %outer.latch, label %inner, !prof !1 + +outer.latch: + br label %outer.header +} + +!0 = !{!"function_entry_count", i64 549102} +!1 = !{!"branch_weights", i32 4027913, i32 1} diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll @@ -12,12 +12,12 @@ entry: br label %for.body -; VF_8-LABEL: Checking a loop in "i8_factor_2" +; VF_8-LABEL: Checking a loop in 'i8_factor_2' ; VF_8: Found an estimated cost of 2 for VF 8 For instruction: %tmp2 = load i8, i8* %tmp0, align 1 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i8, i8* %tmp1, align 1 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i8 0, i8* %tmp0, align 1 ; VF_8-NEXT: Found an estimated cost of 2 for VF 8 For instruction: store i8 0, i8* %tmp1, align 1 -; VF_16-LABEL: Checking a loop in "i8_factor_2" +; VF_16-LABEL: Checking a loop in 'i8_factor_2' ; VF_16: Found an estimated cost of 2 for VF 16 For instruction: %tmp2 = load i8, i8* %tmp0, align 1 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i8, i8* %tmp1, align 1 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i8 0, i8* %tmp0, align 1 @@ -43,17 +43,17 @@ entry: br label %for.body -; VF_4-LABEL: Checking a loop in "i16_factor_2" +; VF_4-LABEL: Checking a loop in 'i16_factor_2' ; VF_4: Found an estimated cost of 2 for VF 4 For instruction: %tmp2 = load i16, i16* %tmp0, align 2 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i16, i16* %tmp1, align 2 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i16 0, i16* %tmp0, align 2 ; VF_4-NEXT: Found an estimated cost of 2 for VF 4 For instruction: store i16 0, i16* %tmp1, align 2 -; VF_8-LABEL: Checking a loop in "i16_factor_2" +; VF_8-LABEL: Checking a loop in 'i16_factor_2' ; VF_8: Found an estimated cost of 2 for VF 8 For instruction: %tmp2 = load i16, i16* %tmp0, align 2 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i16, i16* %tmp1, align 2 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i16 0, i16* %tmp0, align 2 ; VF_8-NEXT: Found an estimated cost of 2 for VF 8 For instruction: store i16 0, i16* %tmp1, align 2 -; VF_16-LABEL: Checking a loop in "i16_factor_2" +; VF_16-LABEL: Checking a loop in 'i16_factor_2' ; VF_16: Found an estimated cost of 4 for VF 16 For instruction: %tmp2 = load i16, i16* %tmp0, align 2 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i16, i16* %tmp1, align 2 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i16 0, i16* %tmp0, align 2 @@ -79,22 +79,22 @@ entry: br label %for.body -; VF_2-LABEL: Checking a loop in "i32_factor_2" +; VF_2-LABEL: Checking a loop in 'i32_factor_2' ; VF_2: Found an estimated cost of 2 for VF 2 For instruction: %tmp2 = load i32, i32* %tmp0, align 4 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i32, i32* %tmp1, align 4 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i32 0, i32* %tmp0, align 4 ; VF_2-NEXT: Found an estimated cost of 2 for VF 2 For instruction: store i32 0, i32* %tmp1, align 4 -; VF_4-LABEL: Checking a loop in "i32_factor_2" +; VF_4-LABEL: Checking a loop in 'i32_factor_2' ; VF_4: Found an estimated cost of 2 for VF 4 For instruction: %tmp2 = load i32, i32* %tmp0, align 4 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i32, i32* %tmp1, align 4 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i32 0, i32* %tmp0, align 4 ; VF_4-NEXT: Found an estimated cost of 2 for VF 4 For instruction: store i32 0, i32* %tmp1, align 4 -; VF_8-LABEL: Checking a loop in "i32_factor_2" +; VF_8-LABEL: Checking a loop in 'i32_factor_2' ; VF_8: Found an estimated cost of 4 for VF 8 For instruction: %tmp2 = load i32, i32* %tmp0, align 4 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i32, i32* %tmp1, align 4 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i32 0, i32* %tmp0, align 4 ; VF_8-NEXT: Found an estimated cost of 4 for VF 8 For instruction: store i32 0, i32* %tmp1, align 4 -; VF_16-LABEL: Checking a loop in "i32_factor_2" +; VF_16-LABEL: Checking a loop in 'i32_factor_2' ; VF_16: Found an estimated cost of 8 for VF 16 For instruction: %tmp2 = load i32, i32* %tmp0, align 4 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i32, i32* %tmp1, align 4 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i32 0, i32* %tmp0, align 4 @@ -120,22 +120,22 @@ entry: br label %for.body -; VF_2-LABEL: Checking a loop in "i64_factor_2" +; VF_2-LABEL: Checking a loop in 'i64_factor_2' ; VF_2: Found an estimated cost of 2 for VF 2 For instruction: %tmp2 = load i64, i64* %tmp0, align 8 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i64, i64* %tmp1, align 8 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i64 0, i64* %tmp0, align 8 ; VF_2-NEXT: Found an estimated cost of 2 for VF 2 For instruction: store i64 0, i64* %tmp1, align 8 -; VF_4-LABEL: Checking a loop in "i64_factor_2" +; VF_4-LABEL: Checking a loop in 'i64_factor_2' ; VF_4: Found an estimated cost of 4 for VF 4 For instruction: %tmp2 = load i64, i64* %tmp0, align 8 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i64, i64* %tmp1, align 8 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i64 0, i64* %tmp0, align 8 ; VF_4-NEXT: Found an estimated cost of 4 for VF 4 For instruction: store i64 0, i64* %tmp1, align 8 -; VF_8-LABEL: Checking a loop in "i64_factor_2" +; VF_8-LABEL: Checking a loop in 'i64_factor_2' ; VF_8: Found an estimated cost of 8 for VF 8 For instruction: %tmp2 = load i64, i64* %tmp0, align 8 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i64, i64* %tmp1, align 8 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i64 0, i64* %tmp0, align 8 ; VF_8-NEXT: Found an estimated cost of 8 for VF 8 For instruction: store i64 0, i64* %tmp1, align 8 -; VF_16-LABEL: Checking a loop in "i64_factor_2" +; VF_16-LABEL: Checking a loop in 'i64_factor_2' ; VF_16: Found an estimated cost of 16 for VF 16 For instruction: %tmp2 = load i64, i64* %tmp0, align 8 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i64, i64* %tmp1, align 8 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i64 0, i64* %tmp0, align 8 @@ -167,7 +167,7 @@ ; stores do not form a legal interleaved group because the group would contain ; gaps. ; -; VF_2-LABEL: Checking a loop in "i64_factor_8" +; VF_2-LABEL: Checking a loop in 'i64_factor_8' ; VF_2: Found an estimated cost of 10 for VF 2 For instruction: %tmp2 = load i64, i64* %tmp0, align 8 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i64, i64* %tmp1, align 8 ; VF_2-NEXT: Found an estimated cost of 7 for VF 2 For instruction: store i64 0, i64* %tmp0, align 8 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/masked-op-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/masked-op-cost.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/masked-op-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/masked-op-cost.ll @@ -4,7 +4,7 @@ target triple = "aarch64-unknown-linux-gnu" -; CHECK-COST: Checking a loop in "fixed_width" +; CHECK-COST: Checking a loop in 'fixed_width' ; CHECK-COST: Found an estimated cost of 11 for VF 2 For instruction: store i32 2, i32* %arrayidx1, align 4 ; CHECK-COST: Found an estimated cost of 25 for VF 4 For instruction: store i32 2, i32* %arrayidx1, align 4 ; CHECK-COST: Selecting VF: 1. @@ -45,7 +45,7 @@ } -; CHECK-COST: Checking a loop in "scalable" +; CHECK-COST: Checking a loop in 'scalable' ; CHECK-COST: Found an estimated cost of 2 for VF vscale x 4 For instruction: store i32 2, i32* %arrayidx1, align 4 define void @scalable(i32* noalias nocapture %a, i32* noalias nocapture readonly %b, i64 %n) #0 { diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization.ll @@ -7,7 +7,7 @@ ; is calculated as vscale x 4 (max legal SVE vector size) or vscale x 16 ; (maximized bandwidth for i8 in the loop). define void @test0(i32* %a, i8* %b, i32* %c) #0 { -; CHECK: LV: Checking a loop in "test0" +; CHECK: LV: Checking a loop in 'test0' ; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 4 ; CHECK_SCALABLE_ON: LV: Selecting VF: vscale x 4 ; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF @@ -38,7 +38,7 @@ ; Test that the MaxVF for the following loop, with a dependence distance ; of 64 elements, is calculated as (maxvscale = 16) * 4. define void @test1(i32* %a, i8* %b) #0 { -; CHECK: LV: Checking a loop in "test1" +; CHECK: LV: Checking a loop in 'test1' ; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 4 ; CHECK_SCALABLE_ON: LV: Selecting VF: vscale x 4 ; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF @@ -70,7 +70,7 @@ ; Test that the MaxVF for the following loop, with a dependence distance ; of 32 elements, is calculated as (maxvscale = 16) * 2. define void @test2(i32* %a, i8* %b) #0 { -; CHECK: LV: Checking a loop in "test2" +; CHECK: LV: Checking a loop in 'test2' ; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 2 ; CHECK_SCALABLE_ON: LV: Selecting VF: vscale x 2 ; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF @@ -102,7 +102,7 @@ ; Test that the MaxVF for the following loop, with a dependence distance ; of 16 elements, is calculated as (maxvscale = 16) * 1. define void @test3(i32* %a, i8* %b) #0 { -; CHECK: LV: Checking a loop in "test3" +; CHECK: LV: Checking a loop in 'test3' ; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 1 ; CHECK_SCALABLE_ON: LV: Selecting VF: 4 ; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF @@ -134,7 +134,7 @@ ; Test the fallback mechanism when scalable vectors are not feasible due ; to e.g. dependence distance. define void @test4(i32* %a, i32* %b) #0 { -; CHECK: LV: Checking a loop in "test4" +; CHECK: LV: Checking a loop in 'test4' ; CHECK_SCALABLE_ON-NOT: LV: Found feasible scalable VF ; CHECK_SCALABLE_ON-NOT: LV: Found feasible scalable VF ; CHECK_SCALABLE_ON: LV: Selecting VF: 4 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-hint.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-hint.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-hint.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-hint.ll @@ -37,7 +37,7 @@ ; unless max(vscale)=2 it's unsafe to vectorize. For SVE max(vscale)=16, check ; fixed-width vectorization is used instead. -; CHECK-DBG: LV: Checking a loop in "test1" +; CHECK-DBG: LV: Checking a loop in 'test1' ; CHECK-DBG: LV: Scalable vectorization is available ; CHECK-DBG: LV: Max legal vector width too small, scalable vectorization unfeasible. ; CHECK-DBG: remark: :0:0: Max legal vector width too small, scalable vectorization unfeasible. @@ -82,7 +82,7 @@ ; } ; } -; CHECK-DBG: LV: Checking a loop in "test2" +; CHECK-DBG: LV: Checking a loop in 'test2' ; CHECK-DBG: LV: Scalable vectorization is available ; CHECK-DBG: LV: Max legal vector width too small, scalable vectorization unfeasible. ; CHECK-DBG: LV: The max safe fixed VF is: 4. @@ -132,7 +132,7 @@ ; ; Max fixed VF=32, Max scalable VF=2, safe to vectorize. -; CHECK-DBG-LABEL: LV: Checking a loop in "test3" +; CHECK-DBG-LABEL: LV: Checking a loop in 'test3' ; CHECK-DBG: LV: Scalable vectorization is available ; CHECK-DBG: LV: The max safe scalable VF is: vscale x 2. ; CHECK-DBG: LV: Using user VF vscale x 2. @@ -181,7 +181,7 @@ ; ; Max fixed VF=32, Max scalable VF=2, unsafe to vectorize. -; CHECK-DBG-LABEL: LV: Checking a loop in "test4" +; CHECK-DBG-LABEL: LV: Checking a loop in 'test4' ; CHECK-DBG: LV: Scalable vectorization is available ; CHECK-DBG: LV: The max safe scalable VF is: vscale x 2. ; CHECK-DBG: LV: User VF=vscale x 4 is unsafe. Ignoring scalable UserVF. @@ -232,7 +232,7 @@ ; ; Max fixed VF=128, Max scalable VF=8, safe to vectorize. -; CHECK-DBG-LABEL: LV: Checking a loop in "test5" +; CHECK-DBG-LABEL: LV: Checking a loop in 'test5' ; CHECK-DBG: LV: Scalable vectorization is available ; CHECK-DBG: LV: The max safe scalable VF is: vscale x 8. ; CHECK-DBG: LV: Using user VF vscale x 4 @@ -280,7 +280,7 @@ ; ; Max fixed VF=128, Max scalable VF=8, unsafe to vectorize. -; CHECK-DBG-LABEL: LV: Checking a loop in "test6" +; CHECK-DBG-LABEL: LV: Checking a loop in 'test6' ; CHECK-DBG: LV: Scalable vectorization is available ; CHECK-DBG: LV: The max safe scalable VF is: vscale x 8. ; CHECK-DBG: LV: User VF=vscale x 16 is unsafe. Ignoring scalable UserVF. @@ -315,7 +315,7 @@ !16 = !{!"llvm.loop.vectorize.width", i32 16} !17 = !{!"llvm.loop.vectorize.scalable.enable", i1 true} -; CHECK-NO-SVE-REMARKS-LABEL: LV: Checking a loop in "test_no_sve" +; CHECK-NO-SVE-REMARKS-LABEL: LV: Checking a loop in 'test_no_sve' ; CHECK-NO-SVE-REMARKS: LV: User VF=vscale x 4 is ignored because scalable vectors are not available. ; CHECK-NO-SVE-REMARKS: remark: :0:0: User-specified vectorization factor vscale x 4 is ignored because the target does not support scalable vectors. The compiler will pick a more suitable value. ; CHECK-NO-SVE-REMARKS: LV: Selecting VF: 4. @@ -349,7 +349,7 @@ ; Test the LV falls back to fixed-width vectorization if scalable vectors are ; supported but max vscale is undefined. ; -; CHECK-DBG-LABEL: LV: Checking a loop in "test_no_max_vscale" +; CHECK-DBG-LABEL: LV: Checking a loop in 'test_no_max_vscale' ; CHECK-DBG: LV: Scalable vectorization is available ; CHECK-DBG: The max safe fixed VF is: 4. ; CHECK-DBG: LV: User VF=vscale x 4 is unsafe. Ignoring scalable UserVF. diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/smallest-and-widest-types.ll b/llvm/test/Transforms/LoopVectorize/AArch64/smallest-and-widest-types.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/smallest-and-widest-types.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/smallest-and-widest-types.ll @@ -4,7 +4,7 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "aarch64--linux-gnu" -; CHECK-LABEL: Checking a loop in "interleaved_access" +; CHECK-LABEL: Checking a loop in 'interleaved_access' ; CHECK: The Smallest and Widest types: 64 / 64 bits ; define void @interleaved_access(i8** %A, i64 %N) { @@ -36,7 +36,7 @@ ; determined by looking through the recurrences, which allows a sensible VF to be ; chosen. The following 3 cases check different combinations of widths. -; CHECK-LABEL: Checking a loop in "no_loads_stores_32" +; CHECK-LABEL: Checking a loop in 'no_loads_stores_32' ; CHECK: The Smallest and Widest types: 4294967295 / 32 bits ; CHECK: Selecting VF: 4 @@ -59,7 +59,7 @@ ret double %.lcssa } -; CHECK-LABEL: Checking a loop in "no_loads_stores_16" +; CHECK-LABEL: Checking a loop in 'no_loads_stores_16' ; CHECK: The Smallest and Widest types: 4294967295 / 16 bits ; CHECK: Selecting VF: 8 @@ -81,7 +81,7 @@ ret double %.lcssa } -; CHECK-LABEL: Checking a loop in "no_loads_stores_8" +; CHECK-LABEL: Checking a loop in 'no_loads_stores_8' ; CHECK: The Smallest and Widest types: 4294967295 / 8 bits ; CHECK: Selecting VF: 16 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-epilog-vect.ll @@ -7,11 +7,11 @@ target triple = "aarch64-linux-gnu" -; DEBUG: LV: Checking a loop in "main_vf_vscale_x_16" +; DEBUG: LV: Checking a loop in 'main_vf_vscale_x_16' ; DEBUG: Create Skeleton for epilogue vectorized loop (first pass) ; DEBUG: Main Loop VF:vscale x 16, Main Loop UF:2, Epilogue Loop VF:vscale x 8, Epilogue Loop UF:1 -; DEBUG-FORCED: LV: Checking a loop in "main_vf_vscale_x_16" +; DEBUG-FORCED: LV: Checking a loop in 'main_vf_vscale_x_16' ; DEBUG-FORCED: LEV: Epilogue vectorization factor is forced. ; DEBUG-FORCED: Create Skeleton for epilogue vectorized loop (first pass) ; DEBUG-FORCED: Main Loop VF:vscale x 16, Main Loop UF:2, Epilogue Loop VF:8, Epilogue Loop UF:1 @@ -194,11 +194,11 @@ } -; DEBUG: LV: Checking a loop in "main_vf_vscale_x_2" +; DEBUG: LV: Checking a loop in 'main_vf_vscale_x_2' ; DEBUG: Create Skeleton for epilogue vectorized loop (first pass) ; DEBUG: Main Loop VF:vscale x 2, Main Loop UF:2, Epilogue Loop VF:8, Epilogue Loop UF:1 -; DEBUG-FORCED: LV: Checking a loop in "main_vf_vscale_x_2" +; DEBUG-FORCED: LV: Checking a loop in 'main_vf_vscale_x_2' ; DEBUG-FORCED: LEV: Epilogue vectorization factor is forced. ; DEBUG-FORCED: Create Skeleton for epilogue vectorized loop (first pass) ; DEBUG-FORCED: Main Loop VF:vscale x 2, Main Loop UF:2, Epilogue Loop VF:8, Epilogue Loop UF:1 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter-cost.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter-cost.ll @@ -3,7 +3,7 @@ target triple="aarch64--linux-gnu" -; CHECK: LV: Checking a loop in "gather_nxv4i32_loaded_index" +; CHECK: LV: Checking a loop in 'gather_nxv4i32_loaded_index' ; CHECK: LV: Found an estimated cost of 81 for VF vscale x 4 For instruction: %1 = load float, float* %arrayidx3, align 4 define void @gather_nxv4i32_loaded_index(float* noalias nocapture readonly %a, i64* noalias nocapture readonly %b, float* noalias nocapture %c, i64 %n) #0 { entry: @@ -25,7 +25,7 @@ ret void } -; CHECK: LV: Checking a loop in "scatter_nxv4i32_loaded_index" +; CHECK: LV: Checking a loop in 'scatter_nxv4i32_loaded_index' ; CHECK: LV: Found an estimated cost of 81 for VF vscale x 4 For instruction: store float %1, float* %arrayidx5, align 4 define void @scatter_nxv4i32_loaded_index(float* noalias nocapture readonly %a, i64* noalias nocapture readonly %b, float* noalias nocapture %c, i64 %n) #0 { entry: @@ -49,7 +49,7 @@ ; NOTE: For runtime-determined strides the vectoriser versions the loop and adds SCEV checks ; to ensure the stride value is always 1. Therefore, it can assume a contiguous load and a cost of 1. -; CHECK: LV: Checking a loop in "gather_nxv4i32_unknown_stride" +; CHECK: LV: Checking a loop in 'gather_nxv4i32_unknown_stride' ; CHECK: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %0 = load float, float* %arrayidx, align 4 define void @gather_nxv4i32_unknown_stride(float* noalias nocapture readonly %a, float* noalias nocapture %b, i64 %stride, i64 %n) #0 { entry: @@ -72,7 +72,7 @@ ; NOTE: For runtime-determined strides the vectoriser versions the loop and adds SCEV checks ; to ensure the stride value is always 1. Therefore, it can assume a contiguous load and cost is 1. -; CHECK: LV: Checking a loop in "scatter_nxv4i32_unknown_stride" +; CHECK: LV: Checking a loop in 'scatter_nxv4i32_unknown_stride' ; CHECK: LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: store float %0, float* %arrayidx2, align 4 define void @scatter_nxv4i32_unknown_stride(float* noalias nocapture readonly %a, float* noalias nocapture %b, i64 %stride, i64 %n) #0 { entry: @@ -93,7 +93,7 @@ ret void } -; CHECK: LV: Checking a loop in "gather_nxv4i32_stride2" +; CHECK: LV: Checking a loop in 'gather_nxv4i32_stride2' ; CHECK: LV: Found an estimated cost of 81 for VF vscale x 4 For instruction: %0 = load float, float* %arrayidx, align 4 define void @gather_nxv4i32_stride2(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) #0 { entry: @@ -114,7 +114,7 @@ ret void } -; CHECK: LV: Checking a loop in "scatter_nxv4i32_stride2" +; CHECK: LV: Checking a loop in 'scatter_nxv4i32_stride2' ; CHECK: LV: Found an estimated cost of 81 for VF vscale x 4 For instruction: store float %0, float* %arrayidx2, align 4 define void @scatter_nxv4i32_stride2(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) #0 { entry: @@ -136,7 +136,7 @@ } -; CHECK: LV: Checking a loop in "gather_nxv4i32_stride64" +; CHECK: LV: Checking a loop in 'gather_nxv4i32_stride64' ; CHECK: LV: Found an estimated cost of 81 for VF vscale x 4 For instruction: %0 = load float, float* %arrayidx, align 4 define void @gather_nxv4i32_stride64(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) #0 { entry: @@ -157,7 +157,7 @@ ret void } -; CHECK: LV: Checking a loop in "scatter_nxv4i32_stride64" +; CHECK: LV: Checking a loop in 'scatter_nxv4i32_stride64' ; CHECK: LV: Found an estimated cost of 81 for VF vscale x 4 For instruction: store float %0, float* %arrayidx2, align 4 define void @scatter_nxv4i32_stride64(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) #0 { entry: diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll @@ -4,7 +4,7 @@ target triple = "aarch64-unknown-linux-gnu" -; CHECK-LABEL: LV: Checking a loop in "pointer_induction_used_as_vector" +; CHECK-LABEL: LV: Checking a loop in 'pointer_induction_used_as_vector' ; CHECK-NOT: LV: Found {{.*}} scalar instruction: %ptr.iv.2.next = getelementptr inbounds i8, i8* %ptr.iv.2, i64 1 ; ; CHECK: VPlan 'Initial VPlan for VF={vscale x 2},UF>=1' { diff --git a/llvm/test/Transforms/LoopVectorize/ARM/arm-ieee-vectorize.ll b/llvm/test/Transforms/LoopVectorize/ARM/arm-ieee-vectorize.ll --- a/llvm/test/Transforms/LoopVectorize/ARM/arm-ieee-vectorize.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/arm-ieee-vectorize.ll @@ -14,7 +14,7 @@ ; are allowed, even without -ffast-math. ; Integer loops are always vectorizeable -; CHECK: Checking a loop in "sumi" +; CHECK: Checking a loop in 'sumi' ; CHECK: We can vectorize this loop! define void @sumi(i32* noalias nocapture readonly %A, i32* noalias nocapture readonly %B, i32* noalias nocapture %C, i32 %N) { entry: @@ -45,11 +45,11 @@ } ; Floating-point loops need fast-math to be vectorizeable -; LINUX: Checking a loop in "sumf" +; LINUX: Checking a loop in 'sumf' ; LINUX: Potentially unsafe FP op prevents vectorization -; MVE: Checking a loop in "sumf" +; MVE: Checking a loop in 'sumf' ; MVE: We can vectorize this loop! -; DARWIN: Checking a loop in "sumf" +; DARWIN: Checking a loop in 'sumf' ; DARWIN: We can vectorize this loop! define void @sumf(float* noalias nocapture readonly %A, float* noalias nocapture readonly %B, float* noalias nocapture %C, i32 %N) { entry: @@ -80,7 +80,7 @@ } ; Integer loops are always vectorizeable -; CHECK: Checking a loop in "redi" +; CHECK: Checking a loop in 'redi' ; CHECK: We can vectorize this loop! define i32 @redi(i32* noalias nocapture readonly %a, i32* noalias nocapture readonly %b, i32 %N) { entry: @@ -113,11 +113,11 @@ } ; Floating-point loops need fast-math to be vectorizeable -; LINUX: Checking a loop in "redf" +; LINUX: Checking a loop in 'redf' ; LINUX: Potentially unsafe FP op prevents vectorization -; MVE: Checking a loop in "redf" +; MVE: Checking a loop in 'redf' ; MVE: We can vectorize this loop! -; DARWIN: Checking a loop in "redf" +; DARWIN: Checking a loop in 'redf' ; DARWIN: We can vectorize this loop! define float @redf(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i32 %N) { entry: @@ -150,9 +150,9 @@ } ; Make sure calls that turn into builtins are also covered -; LINUX: Checking a loop in "fabs" +; LINUX: Checking a loop in 'fabs' ; LINUX: Potentially unsafe FP op prevents vectorization -; DARWIN: Checking a loop in "fabs" +; DARWIN: Checking a loop in 'fabs' ; DARWIN: We can vectorize this loop! define void @fabs(float* noalias nocapture readonly %A, float* noalias nocapture readonly %B, float* noalias nocapture %C, i32 %N) { entry: @@ -178,7 +178,7 @@ } ; Integer loops are always vectorizeable -; CHECK: Checking a loop in "sumi_fast" +; CHECK: Checking a loop in 'sumi_fast' ; CHECK: We can vectorize this loop! define void @sumi_fast(i32* noalias nocapture readonly %A, i32* noalias nocapture readonly %B, i32* noalias nocapture %C, i32 %N) { entry: @@ -209,7 +209,7 @@ } ; Floating-point loops can be vectorizeable with fast-math -; CHECK: Checking a loop in "sumf_fast" +; CHECK: Checking a loop in 'sumf_fast' ; CHECK: We can vectorize this loop! define void @sumf_fast(float* noalias nocapture readonly %A, float* noalias nocapture readonly %B, float* noalias nocapture %C, i32 %N) { entry: @@ -240,7 +240,7 @@ } ; Integer loops are always vectorizeable -; CHECK: Checking a loop in "redi_fast" +; CHECK: Checking a loop in 'redi_fast' ; CHECK: We can vectorize this loop! define i32 @redi_fast(i32* noalias nocapture readonly %a, i32* noalias nocapture readonly %b, i32 %N) { entry: @@ -273,7 +273,7 @@ } ; Floating-point loops can be vectorizeable with fast-math -; CHECK: Checking a loop in "redf_fast" +; CHECK: Checking a loop in 'redf_fast' ; CHECK: We can vectorize this loop! define float @redf_fast(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i32 %N) { entry: @@ -306,7 +306,7 @@ } ; Make sure calls that turn into builtins are also covered -; CHECK: Checking a loop in "fabs_fast" +; CHECK: Checking a loop in 'fabs_fast' ; CHECK: We can vectorize this loop! define void @fabs_fast(float* noalias nocapture readonly %A, float* noalias nocapture readonly %B, float* noalias nocapture %C, i32 %N) { entry: diff --git a/llvm/test/Transforms/LoopVectorize/ARM/interleaved_cost.ll b/llvm/test/Transforms/LoopVectorize/ARM/interleaved_cost.ll --- a/llvm/test/Transforms/LoopVectorize/ARM/interleaved_cost.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/interleaved_cost.ll @@ -12,12 +12,12 @@ entry: br label %for.body -; VF_8-LABEL: Checking a loop in "i8_factor_2" +; VF_8-LABEL: Checking a loop in 'i8_factor_2' ; VF_8: Found an estimated cost of 2 for VF 8 For instruction: %tmp2 = load i8, i8* %tmp0, align 1 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i8, i8* %tmp1, align 1 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i8 0, i8* %tmp0, align 1 ; VF_8-NEXT: Found an estimated cost of 2 for VF 8 For instruction: store i8 0, i8* %tmp1, align 1 -; VF_16-LABEL: Checking a loop in "i8_factor_2" +; VF_16-LABEL: Checking a loop in 'i8_factor_2' ; VF_16: Found an estimated cost of 2 for VF 16 For instruction: %tmp2 = load i8, i8* %tmp0, align 1 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i8, i8* %tmp1, align 1 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i8 0, i8* %tmp0, align 1 @@ -43,17 +43,17 @@ entry: br label %for.body -; VF_4-LABEL: Checking a loop in "i16_factor_2" +; VF_4-LABEL: Checking a loop in 'i16_factor_2' ; VF_4: Found an estimated cost of 2 for VF 4 For instruction: %tmp2 = load i16, i16* %tmp0, align 2 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i16, i16* %tmp1, align 2 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i16 0, i16* %tmp0, align 2 ; VF_4-NEXT: Found an estimated cost of 2 for VF 4 For instruction: store i16 0, i16* %tmp1, align 2 -; VF_8-LABEL: Checking a loop in "i16_factor_2" +; VF_8-LABEL: Checking a loop in 'i16_factor_2' ; VF_8: Found an estimated cost of 2 for VF 8 For instruction: %tmp2 = load i16, i16* %tmp0, align 2 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i16, i16* %tmp1, align 2 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i16 0, i16* %tmp0, align 2 ; VF_8-NEXT: Found an estimated cost of 2 for VF 8 For instruction: store i16 0, i16* %tmp1, align 2 -; VF_16-LABEL: Checking a loop in "i16_factor_2" +; VF_16-LABEL: Checking a loop in 'i16_factor_2' ; VF_16: Found an estimated cost of 4 for VF 16 For instruction: %tmp2 = load i16, i16* %tmp0, align 2 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i16, i16* %tmp1, align 2 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i16 0, i16* %tmp0, align 2 @@ -79,22 +79,22 @@ entry: br label %for.body -; VF_2-LABEL: Checking a loop in "i32_factor_2" +; VF_2-LABEL: Checking a loop in 'i32_factor_2' ; VF_2: Found an estimated cost of 2 for VF 2 For instruction: %tmp2 = load i32, i32* %tmp0, align 4 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i32, i32* %tmp1, align 4 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i32 0, i32* %tmp0, align 4 ; VF_2-NEXT: Found an estimated cost of 2 for VF 2 For instruction: store i32 0, i32* %tmp1, align 4 -; VF_4-LABEL: Checking a loop in "i32_factor_2" +; VF_4-LABEL: Checking a loop in 'i32_factor_2' ; VF_4: Found an estimated cost of 2 for VF 4 For instruction: %tmp2 = load i32, i32* %tmp0, align 4 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i32, i32* %tmp1, align 4 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i32 0, i32* %tmp0, align 4 ; VF_4-NEXT: Found an estimated cost of 2 for VF 4 For instruction: store i32 0, i32* %tmp1, align 4 -; VF_8-LABEL: Checking a loop in "i32_factor_2" +; VF_8-LABEL: Checking a loop in 'i32_factor_2' ; VF_8: Found an estimated cost of 4 for VF 8 For instruction: %tmp2 = load i32, i32* %tmp0, align 4 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i32, i32* %tmp1, align 4 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i32 0, i32* %tmp0, align 4 ; VF_8-NEXT: Found an estimated cost of 4 for VF 8 For instruction: store i32 0, i32* %tmp1, align 4 -; VF_16-LABEL: Checking a loop in "i32_factor_2" +; VF_16-LABEL: Checking a loop in 'i32_factor_2' ; VF_16: Found an estimated cost of 8 for VF 16 For instruction: %tmp2 = load i32, i32* %tmp0, align 4 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i32, i32* %tmp1, align 4 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i32 0, i32* %tmp0, align 4 @@ -120,12 +120,12 @@ entry: br label %for.body -; VF_4-LABEL: Checking a loop in "half_factor_2" +; VF_4-LABEL: Checking a loop in 'half_factor_2' ; VF_4: Found an estimated cost of 40 for VF 4 For instruction: %tmp2 = load half, half* %tmp0, align 2 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load half, half* %tmp1, align 2 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store half 0xH0000, half* %tmp0, align 2 ; VF_4-NEXT: Found an estimated cost of 32 for VF 4 For instruction: store half 0xH0000, half* %tmp1, align 2 -; VF_8-LABEL: Checking a loop in "half_factor_2" +; VF_8-LABEL: Checking a loop in 'half_factor_2' ; VF_8: Found an estimated cost of 80 for VF 8 For instruction: %tmp2 = load half, half* %tmp0, align 2 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load half, half* %tmp1, align 2 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store half 0xH0000, half* %tmp0, align 2 diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-interleaved-cost.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-interleaved-cost.ll --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-interleaved-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-interleaved-cost.ll @@ -14,22 +14,22 @@ entry: br label %for.body -; VF_2-LABEL: Checking a loop in "i8_factor_2" +; VF_2-LABEL: Checking a loop in 'i8_factor_2' ; VF_2: Found an estimated cost of 24 for VF 2 For instruction: %tmp2 = load i8, i8* %tmp0, align 1 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i8, i8* %tmp1, align 1 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i8 0, i8* %tmp0, align 1 ; VF_2-NEXT: Found an estimated cost of 8 for VF 2 For instruction: store i8 0, i8* %tmp1, align 1 -; VF_4-LABEL: Checking a loop in "i8_factor_2" +; VF_4-LABEL: Checking a loop in 'i8_factor_2' ; VF_4: Found an estimated cost of 4 for VF 4 For instruction: %tmp2 = load i8, i8* %tmp0, align 1 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i8, i8* %tmp1, align 1 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i8 0, i8* %tmp0, align 1 ; VF_4-NEXT: Found an estimated cost of 4 for VF 4 For instruction: store i8 0, i8* %tmp1, align 1 -; VF_8-LABEL: Checking a loop in "i8_factor_2" +; VF_8-LABEL: Checking a loop in 'i8_factor_2' ; VF_8: Found an estimated cost of 4 for VF 8 For instruction: %tmp2 = load i8, i8* %tmp0, align 1 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i8, i8* %tmp1, align 1 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i8 0, i8* %tmp0, align 1 ; VF_8-NEXT: Found an estimated cost of 4 for VF 8 For instruction: store i8 0, i8* %tmp1, align 1 -; VF_16-LABEL: Checking a loop in "i8_factor_2" +; VF_16-LABEL: Checking a loop in 'i8_factor_2' ; VF_16: Found an estimated cost of 4 for VF 16 For instruction: %tmp2 = load i8, i8* %tmp0, align 1 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i8, i8* %tmp1, align 1 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i8 0, i8* %tmp0, align 1 @@ -55,22 +55,22 @@ entry: br label %for.body -; VF_2-LABEL: Checking a loop in "i16_factor_2" +; VF_2-LABEL: Checking a loop in 'i16_factor_2' ; VF_2: Found an estimated cost of 24 for VF 2 For instruction: %tmp2 = load i16, i16* %tmp0, align 2 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i16, i16* %tmp1, align 2 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i16 0, i16* %tmp0, align 2 ; VF_2-NEXT: Found an estimated cost of 8 for VF 2 For instruction: store i16 0, i16* %tmp1, align 2 -; VF_4-LABEL: Checking a loop in "i16_factor_2" +; VF_4-LABEL: Checking a loop in 'i16_factor_2' ; VF_4: Found an estimated cost of 4 for VF 4 For instruction: %tmp2 = load i16, i16* %tmp0, align 2 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i16, i16* %tmp1, align 2 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i16 0, i16* %tmp0, align 2 ; VF_4-NEXT: Found an estimated cost of 4 for VF 4 For instruction: store i16 0, i16* %tmp1, align 2 -; VF_8-LABEL: Checking a loop in "i16_factor_2" +; VF_8-LABEL: Checking a loop in 'i16_factor_2' ; VF_8: Found an estimated cost of 4 for VF 8 For instruction: %tmp2 = load i16, i16* %tmp0, align 2 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i16, i16* %tmp1, align 2 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i16 0, i16* %tmp0, align 2 ; VF_8-NEXT: Found an estimated cost of 4 for VF 8 For instruction: store i16 0, i16* %tmp1, align 2 -; VF_16-LABEL: Checking a loop in "i16_factor_2" +; VF_16-LABEL: Checking a loop in 'i16_factor_2' ; VF_16: Found an estimated cost of 8 for VF 16 For instruction: %tmp2 = load i16, i16* %tmp0, align 2 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i16, i16* %tmp1, align 2 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i16 0, i16* %tmp0, align 2 @@ -96,22 +96,22 @@ entry: br label %for.body -; VF_2-LABEL: Checking a loop in "i32_factor_2" +; VF_2-LABEL: Checking a loop in 'i32_factor_2' ; VF_2: Found an estimated cost of 24 for VF 2 For instruction: %tmp2 = load i32, i32* %tmp0, align 4 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i32, i32* %tmp1, align 4 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i32 0, i32* %tmp0, align 4 ; VF_2-NEXT: Found an estimated cost of 8 for VF 2 For instruction: store i32 0, i32* %tmp1, align 4 -; VF_4-LABEL: Checking a loop in "i32_factor_2" +; VF_4-LABEL: Checking a loop in 'i32_factor_2' ; VF_4: Found an estimated cost of 4 for VF 4 For instruction: %tmp2 = load i32, i32* %tmp0, align 4 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i32, i32* %tmp1, align 4 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i32 0, i32* %tmp0, align 4 ; VF_4-NEXT: Found an estimated cost of 4 for VF 4 For instruction: store i32 0, i32* %tmp1, align 4 -; VF_8-LABEL: Checking a loop in "i32_factor_2" +; VF_8-LABEL: Checking a loop in 'i32_factor_2' ; VF_8: Found an estimated cost of 8 for VF 8 For instruction: %tmp2 = load i32, i32* %tmp0, align 4 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i32, i32* %tmp1, align 4 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i32 0, i32* %tmp0, align 4 ; VF_8-NEXT: Found an estimated cost of 8 for VF 8 For instruction: store i32 0, i32* %tmp1, align 4 -; VF_16-LABEL: Checking a loop in "i32_factor_2" +; VF_16-LABEL: Checking a loop in 'i32_factor_2' ; VF_16: Found an estimated cost of 16 for VF 16 For instruction: %tmp2 = load i32, i32* %tmp0, align 4 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i32, i32* %tmp1, align 4 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i32 0, i32* %tmp0, align 4 @@ -137,22 +137,22 @@ entry: br label %for.body -; VF_2-LABEL: Checking a loop in "i64_factor_2" +; VF_2-LABEL: Checking a loop in 'i64_factor_2' ; VF_2: Found an estimated cost of 44 for VF 2 For instruction: %tmp2 = load i64, i64* %tmp0, align 8 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i64, i64* %tmp1, align 8 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i64 0, i64* %tmp0, align 8 ; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i64 0, i64* %tmp1, align 8 -; VF_4-LABEL: Checking a loop in "i64_factor_2" +; VF_4-LABEL: Checking a loop in 'i64_factor_2' ; VF_4: Found an estimated cost of 88 for VF 4 For instruction: %tmp2 = load i64, i64* %tmp0, align 8 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i64, i64* %tmp1, align 8 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i64 0, i64* %tmp0, align 8 ; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: store i64 0, i64* %tmp1, align 8 -; VF_8-LABEL: Checking a loop in "i64_factor_2" +; VF_8-LABEL: Checking a loop in 'i64_factor_2' ; VF_8: Found an estimated cost of 176 for VF 8 For instruction: %tmp2 = load i64, i64* %tmp0, align 8 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i64, i64* %tmp1, align 8 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i64 0, i64* %tmp0, align 8 ; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: store i64 0, i64* %tmp1, align 8 -; VF_16-LABEL: Checking a loop in "i64_factor_2" +; VF_16-LABEL: Checking a loop in 'i64_factor_2' ; VF_16: Found an estimated cost of 352 for VF 16 For instruction: %tmp2 = load i64, i64* %tmp0, align 8 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i64, i64* %tmp1, align 8 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i64 0, i64* %tmp0, align 8 @@ -178,22 +178,22 @@ entry: br label %for.body -; VF_2-LABEL: Checking a loop in "f16_factor_2" +; VF_2-LABEL: Checking a loop in 'f16_factor_2' ; VF_2: Found an estimated cost of 12 for VF 2 For instruction: %tmp2 = load half, half* %tmp0, align 2 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load half, half* %tmp1, align 2 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store half 0xH0000, half* %tmp0, align 2 ; VF_2-NEXT: Found an estimated cost of 8 for VF 2 For instruction: store half 0xH0000, half* %tmp1, align 2 -; VF_4-LABEL: Checking a loop in "f16_factor_2" +; VF_4-LABEL: Checking a loop in 'f16_factor_2' ; VF_4: Found an estimated cost of 18 for VF 4 For instruction: %tmp2 = load half, half* %tmp0, align 2 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load half, half* %tmp1, align 2 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store half 0xH0000, half* %tmp0, align 2 ; VF_4-NEXT: Found an estimated cost of 16 for VF 4 For instruction: store half 0xH0000, half* %tmp1, align 2 -; VF_8-LABEL: Checking a loop in "f16_factor_2" +; VF_8-LABEL: Checking a loop in 'f16_factor_2' ; VF_8: Found an estimated cost of 4 for VF 8 For instruction: %tmp2 = load half, half* %tmp0, align 2 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load half, half* %tmp1, align 2 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store half 0xH0000, half* %tmp0, align 2 ; VF_8-NEXT: Found an estimated cost of 4 for VF 8 For instruction: store half 0xH0000, half* %tmp1, align 2 -; VF_16-LABEL: Checking a loop in "f16_factor_2" +; VF_16-LABEL: Checking a loop in 'f16_factor_2' ; VF_16: Found an estimated cost of 8 for VF 16 For instruction: %tmp2 = load half, half* %tmp0, align 2 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load half, half* %tmp1, align 2 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store half 0xH0000, half* %tmp0, align 2 @@ -219,22 +219,22 @@ entry: br label %for.body -; VF_2-LABEL: Checking a loop in "f32_factor_2" +; VF_2-LABEL: Checking a loop in 'f32_factor_2' ; VF_2: Found an estimated cost of 10 for VF 2 For instruction: %tmp2 = load float, float* %tmp0, align 4 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load float, float* %tmp1, align 4 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store float 0.000000e+00, float* %tmp0, align 4 ; VF_2-NEXT: Found an estimated cost of 8 for VF 2 For instruction: store float 0.000000e+00, float* %tmp1, align 4 -; VF_4-LABEL: Checking a loop in "f32_factor_2" +; VF_4-LABEL: Checking a loop in 'f32_factor_2' ; VF_4: Found an estimated cost of 4 for VF 4 For instruction: %tmp2 = load float, float* %tmp0, align 4 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load float, float* %tmp1, align 4 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store float 0.000000e+00, float* %tmp0, align 4 ; VF_4-NEXT: Found an estimated cost of 4 for VF 4 For instruction: store float 0.000000e+00, float* %tmp1, align 4 -; VF_8-LABEL: Checking a loop in "f32_factor_2" +; VF_8-LABEL: Checking a loop in 'f32_factor_2' ; VF_8: Found an estimated cost of 8 for VF 8 For instruction: %tmp2 = load float, float* %tmp0, align 4 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load float, float* %tmp1, align 4 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store float 0.000000e+00, float* %tmp0, align 4 ; VF_8-NEXT: Found an estimated cost of 8 for VF 8 For instruction: store float 0.000000e+00, float* %tmp1, align 4 -; VF_16-LABEL: Checking a loop in "f32_factor_2" +; VF_16-LABEL: Checking a loop in 'f32_factor_2' ; VF_16: Found an estimated cost of 16 for VF 16 For instruction: %tmp2 = load float, float* %tmp0, align 4 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load float, float* %tmp1, align 4 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store float 0.000000e+00, float* %tmp0, align 4 @@ -260,22 +260,22 @@ entry: br label %for.body -; VF_2-LABEL: Checking a loop in "f64_factor_2" +; VF_2-LABEL: Checking a loop in 'f64_factor_2' ; VF_2: Found an estimated cost of 12 for VF 2 For instruction: %tmp2 = load double, double* %tmp0, align 8 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load double, double* %tmp1, align 8 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store double 0.000000e+00, double* %tmp0, align 8 ; VF_2-NEXT: Found an estimated cost of 8 for VF 2 For instruction: store double 0.000000e+00, double* %tmp1, align 8 -; VF_4-LABEL: Checking a loop in "f64_factor_2" +; VF_4-LABEL: Checking a loop in 'f64_factor_2' ; VF_4: Found an estimated cost of 24 for VF 4 For instruction: %tmp2 = load double, double* %tmp0, align 8 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load double, double* %tmp1, align 8 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store double 0.000000e+00, double* %tmp0, align 8 ; VF_4-NEXT: Found an estimated cost of 16 for VF 4 For instruction: store double 0.000000e+00, double* %tmp1, align 8 -; VF_8-LABEL: Checking a loop in "f64_factor_2" +; VF_8-LABEL: Checking a loop in 'f64_factor_2' ; VF_8: Found an estimated cost of 48 for VF 8 For instruction: %tmp2 = load double, double* %tmp0, align 8 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load double, double* %tmp1, align 8 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store double 0.000000e+00, double* %tmp0, align 8 ; VF_8-NEXT: Found an estimated cost of 32 for VF 8 For instruction: store double 0.000000e+00, double* %tmp1, align 8 -; VF_16-LABEL: Checking a loop in "f64_factor_2" +; VF_16-LABEL: Checking a loop in 'f64_factor_2' ; VF_16: Found an estimated cost of 96 for VF 16 For instruction: %tmp2 = load double, double* %tmp0, align 8 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load double, double* %tmp1, align 8 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store double 0.000000e+00, double* %tmp0, align 8 @@ -305,28 +305,28 @@ entry: br label %for.body -; VF_2-LABEL: Checking a loop in "i8_factor_3" +; VF_2-LABEL: Checking a loop in 'i8_factor_3' ; VF_2: Found an estimated cost of 36 for VF 2 For instruction: %tmp3 = load i8, i8* %tmp0, align 1 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp4 = load i8, i8* %tmp1, align 1 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i8, i8* %tmp2, align 1 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i8 0, i8* %tmp0, align 1 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i8 0, i8* %tmp1, align 1 ; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i8 0, i8* %tmp2, align 1 -; VF_4-LABEL: Checking a loop in "i8_factor_3" +; VF_4-LABEL: Checking a loop in 'i8_factor_3' ; VF_4: Found an estimated cost of 72 for VF 4 For instruction: %tmp3 = load i8, i8* %tmp0, align 1 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load i8, i8* %tmp1, align 1 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i8, i8* %tmp2, align 1 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i8 0, i8* %tmp0, align 1 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i8 0, i8* %tmp1, align 1 ; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: store i8 0, i8* %tmp2, align 1 -; VF_8-LABEL: Checking a loop in "i8_factor_3" +; VF_8-LABEL: Checking a loop in 'i8_factor_3' ; VF_8: Found an estimated cost of 144 for VF 8 For instruction: %tmp3 = load i8, i8* %tmp0, align 1 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load i8, i8* %tmp1, align 1 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i8, i8* %tmp2, align 1 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i8 0, i8* %tmp0, align 1 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i8 0, i8* %tmp1, align 1 ; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: store i8 0, i8* %tmp2, align 1 -; VF_16-LABEL: Checking a loop in "i8_factor_3" +; VF_16-LABEL: Checking a loop in 'i8_factor_3' ; VF_16: Found an estimated cost of 288 for VF 16 For instruction: %tmp3 = load i8, i8* %tmp0, align 1 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp4 = load i8, i8* %tmp1, align 1 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i8, i8* %tmp2, align 1 @@ -357,28 +357,28 @@ entry: br label %for.body -; VF_2-LABEL: Checking a loop in "i16_factor_3" +; VF_2-LABEL: Checking a loop in 'i16_factor_3' ; VF_2: Found an estimated cost of 36 for VF 2 For instruction: %tmp3 = load i16, i16* %tmp0, align 2 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp4 = load i16, i16* %tmp1, align 2 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i16, i16* %tmp2, align 2 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i16 0, i16* %tmp0, align 2 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i16 0, i16* %tmp1, align 2 ; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i16 0, i16* %tmp2, align 2 -; VF_4-LABEL: Checking a loop in "i16_factor_3" +; VF_4-LABEL: Checking a loop in 'i16_factor_3' ; VF_4: Found an estimated cost of 72 for VF 4 For instruction: %tmp3 = load i16, i16* %tmp0, align 2 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load i16, i16* %tmp1, align 2 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i16, i16* %tmp2, align 2 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i16 0, i16* %tmp0, align 2 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i16 0, i16* %tmp1, align 2 ; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: store i16 0, i16* %tmp2, align 2 -; VF_8-LABEL: Checking a loop in "i16_factor_3" +; VF_8-LABEL: Checking a loop in 'i16_factor_3' ; VF_8: Found an estimated cost of 144 for VF 8 For instruction: %tmp3 = load i16, i16* %tmp0, align 2 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load i16, i16* %tmp1, align 2 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i16, i16* %tmp2, align 2 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i16 0, i16* %tmp0, align 2 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i16 0, i16* %tmp1, align 2 ; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: store i16 0, i16* %tmp2, align 2 -; VF_16-LABEL: Checking a loop in "i16_factor_3" +; VF_16-LABEL: Checking a loop in 'i16_factor_3' ; VF_16: Found an estimated cost of 288 for VF 16 For instruction: %tmp3 = load i16, i16* %tmp0, align 2 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp4 = load i16, i16* %tmp1, align 2 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i16, i16* %tmp2, align 2 @@ -409,28 +409,28 @@ entry: br label %for.body -; VF_2-LABEL: Checking a loop in "i32_factor_3" +; VF_2-LABEL: Checking a loop in 'i32_factor_3' ; VF_2: Found an estimated cost of 36 for VF 2 For instruction: %tmp3 = load i32, i32* %tmp0, align 4 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp4 = load i32, i32* %tmp1, align 4 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i32, i32* %tmp2, align 4 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i32 0, i32* %tmp0, align 4 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i32 0, i32* %tmp1, align 4 ; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i32 0, i32* %tmp2, align 4 -; VF_4-LABEL: Checking a loop in "i32_factor_3" +; VF_4-LABEL: Checking a loop in 'i32_factor_3' ; VF_4: Found an estimated cost of 24 for VF 4 For instruction: %tmp3 = load i32, i32* %tmp0, align 4 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load i32, i32* %tmp1, align 4 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i32, i32* %tmp2, align 4 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i32 0, i32* %tmp0, align 4 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i32 0, i32* %tmp1, align 4 ; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: store i32 0, i32* %tmp2, align 4 -; VF_8-LABEL: Checking a loop in "i32_factor_3" +; VF_8-LABEL: Checking a loop in 'i32_factor_3' ; VF_8: Found an estimated cost of 144 for VF 8 For instruction: %tmp3 = load i32, i32* %tmp0, align 4 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load i32, i32* %tmp1, align 4 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i32, i32* %tmp2, align 4 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i32 0, i32* %tmp0, align 4 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i32 0, i32* %tmp1, align 4 ; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: store i32 0, i32* %tmp2, align 4 -; VF_16-LABEL: Checking a loop in "i32_factor_3" +; VF_16-LABEL: Checking a loop in 'i32_factor_3' ; VF_16: Found an estimated cost of 288 for VF 16 For instruction: %tmp3 = load i32, i32* %tmp0, align 4 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp4 = load i32, i32* %tmp1, align 4 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i32, i32* %tmp2, align 4 @@ -461,28 +461,28 @@ entry: br label %for.body -; VF_2-LABEL: Checking a loop in "i64_factor_3" +; VF_2-LABEL: Checking a loop in 'i64_factor_3' ; VF_2: Found an estimated cost of 66 for VF 2 For instruction: %tmp3 = load i64, i64* %tmp0, align 8 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp4 = load i64, i64* %tmp1, align 8 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i64, i64* %tmp2, align 8 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i64 0, i64* %tmp0, align 8 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i64 0, i64* %tmp1, align 8 ; VF_2-NEXT: Found an estimated cost of 18 for VF 2 For instruction: store i64 0, i64* %tmp2, align 8 -; VF_4-LABEL: Checking a loop in "i64_factor_3" +; VF_4-LABEL: Checking a loop in 'i64_factor_3' ; VF_4: Found an estimated cost of 132 for VF 4 For instruction: %tmp3 = load i64, i64* %tmp0, align 8 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load i64, i64* %tmp1, align 8 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i64, i64* %tmp2, align 8 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i64 0, i64* %tmp0, align 8 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i64 0, i64* %tmp1, align 8 ; VF_4-NEXT: Found an estimated cost of 36 for VF 4 For instruction: store i64 0, i64* %tmp2, align 8 -; VF_8-LABEL: Checking a loop in "i64_factor_3" +; VF_8-LABEL: Checking a loop in 'i64_factor_3' ; VF_8: Found an estimated cost of 264 for VF 8 For instruction: %tmp3 = load i64, i64* %tmp0, align 8 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load i64, i64* %tmp1, align 8 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i64, i64* %tmp2, align 8 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i64 0, i64* %tmp0, align 8 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i64 0, i64* %tmp1, align 8 ; VF_8-NEXT: Found an estimated cost of 72 for VF 8 For instruction: store i64 0, i64* %tmp2, align 8 -; VF_16-LABEL: Checking a loop in "i64_factor_3" +; VF_16-LABEL: Checking a loop in 'i64_factor_3' ; VF_16: Found an estimated cost of 528 for VF 16 For instruction: %tmp3 = load i64, i64* %tmp0, align 8 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp4 = load i64, i64* %tmp1, align 8 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i64, i64* %tmp2, align 8 @@ -513,28 +513,28 @@ entry: br label %for.body -; VF_2-LABEL: Checking a loop in "f16_factor_3" +; VF_2-LABEL: Checking a loop in 'f16_factor_3' ; VF_2: Found an estimated cost of 18 for VF 2 For instruction: %tmp3 = load half, half* %tmp0, align 2 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp4 = load half, half* %tmp1, align 2 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load half, half* %tmp2, align 2 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store half 0xH0000, half* %tmp0, align 2 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store half 0xH0000, half* %tmp1, align 2 ; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store half 0xH0000, half* %tmp2, align 2 -; VF_4-LABEL: Checking a loop in "f16_factor_3" +; VF_4-LABEL: Checking a loop in 'f16_factor_3' ; VF_4: Found an estimated cost of 28 for VF 4 For instruction: %tmp3 = load half, half* %tmp0, align 2 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load half, half* %tmp1, align 2 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load half, half* %tmp2, align 2 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store half 0xH0000, half* %tmp0, align 2 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store half 0xH0000, half* %tmp1, align 2 ; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: store half 0xH0000, half* %tmp2, align 2 -; VF_8-LABEL: Checking a loop in "f16_factor_3" +; VF_8-LABEL: Checking a loop in 'f16_factor_3' ; VF_8: Found an estimated cost of 56 for VF 8 For instruction: %tmp3 = load half, half* %tmp0, align 2 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load half, half* %tmp1, align 2 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load half, half* %tmp2, align 2 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store half 0xH0000, half* %tmp0, align 2 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store half 0xH0000, half* %tmp1, align 2 ; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: store half 0xH0000, half* %tmp2, align 2 -; VF_16-LABEL: Checking a loop in "f16_factor_3" +; VF_16-LABEL: Checking a loop in 'f16_factor_3' ; VF_16: Found an estimated cost of 112 for VF 16 For instruction: %tmp3 = load half, half* %tmp0, align 2 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp4 = load half, half* %tmp1, align 2 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load half, half* %tmp2, align 2 @@ -565,28 +565,28 @@ entry: br label %for.body -; VF_2-LABEL: Checking a loop in "f32_factor_3" +; VF_2-LABEL: Checking a loop in 'f32_factor_3' ; VF_2: Found an estimated cost of 16 for VF 2 For instruction: %tmp3 = load float, float* %tmp0, align 4 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp4 = load float, float* %tmp1, align 4 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load float, float* %tmp2, align 4 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store float 0.000000e+00, float* %tmp0, align 4 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store float 0.000000e+00, float* %tmp1, align 4 ; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store float 0.000000e+00, float* %tmp2, align 4 -; VF_4-LABEL: Checking a loop in "f32_factor_3" +; VF_4-LABEL: Checking a loop in 'f32_factor_3' ; VF_4: Found an estimated cost of 24 for VF 4 For instruction: %tmp3 = load float, float* %tmp0, align 4 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load float, float* %tmp1, align 4 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load float, float* %tmp2, align 4 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store float 0.000000e+00, float* %tmp0, align 4 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store float 0.000000e+00, float* %tmp1, align 4 ; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: store float 0.000000e+00, float* %tmp2, align 4 -; VF_8-LABEL: Checking a loop in "f32_factor_3" +; VF_8-LABEL: Checking a loop in 'f32_factor_3' ; VF_8: Found an estimated cost of 64 for VF 8 For instruction: %tmp3 = load float, float* %tmp0, align 4 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load float, float* %tmp1, align 4 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load float, float* %tmp2, align 4 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store float 0.000000e+00, float* %tmp0, align 4 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store float 0.000000e+00, float* %tmp1, align 4 ; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: store float 0.000000e+00, float* %tmp2, align 4 -; VF_16-LABEL: Checking a loop in "f32_factor_3" +; VF_16-LABEL: Checking a loop in 'f32_factor_3' ; VF_16: Found an estimated cost of 128 for VF 16 For instruction: %tmp3 = load float, float* %tmp0, align 4 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp4 = load float, float* %tmp1, align 4 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load float, float* %tmp2, align 4 @@ -617,28 +617,28 @@ entry: br label %for.body -; VF_2-LABEL: Checking a loop in "f64_factor_3" +; VF_2-LABEL: Checking a loop in 'f64_factor_3' ; VF_2: Found an estimated cost of 18 for VF 2 For instruction: %tmp3 = load double, double* %tmp0, align 8 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp4 = load double, double* %tmp1, align 8 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load double, double* %tmp2, align 8 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store double 0.000000e+00, double* %tmp0, align 8 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store double 0.000000e+00, double* %tmp1, align 8 ; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store double 0.000000e+00, double* %tmp2, align 8 -; VF_4-LABEL: Checking a loop in "f64_factor_3" +; VF_4-LABEL: Checking a loop in 'f64_factor_3' ; VF_4: Found an estimated cost of 36 for VF 4 For instruction: %tmp3 = load double, double* %tmp0, align 8 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load double, double* %tmp1, align 8 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load double, double* %tmp2, align 8 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store double 0.000000e+00, double* %tmp0, align 8 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store double 0.000000e+00, double* %tmp1, align 8 ; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: store double 0.000000e+00, double* %tmp2, align 8 -; VF_8-LABEL: Checking a loop in "f64_factor_3" +; VF_8-LABEL: Checking a loop in 'f64_factor_3' ; VF_8: Found an estimated cost of 72 for VF 8 For instruction: %tmp3 = load double, double* %tmp0, align 8 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load double, double* %tmp1, align 8 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load double, double* %tmp2, align 8 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store double 0.000000e+00, double* %tmp0, align 8 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store double 0.000000e+00, double* %tmp1, align 8 ; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: store double 0.000000e+00, double* %tmp2, align 8 -; VF_16-LABEL: Checking a loop in "f64_factor_3" +; VF_16-LABEL: Checking a loop in 'f64_factor_3' ; VF_16: Found an estimated cost of 144 for VF 16 For instruction: %tmp3 = load double, double* %tmp0, align 8 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp4 = load double, double* %tmp1, align 8 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load double, double* %tmp2, align 8 @@ -672,7 +672,7 @@ entry: br label %for.body -; VF_2-LABEL: Checking a loop in "i8_factor_4" +; VF_2-LABEL: Checking a loop in 'i8_factor_4' ; VF_2: Found an estimated cost of 48 for VF 2 For instruction: %tmp4 = load i8, i8* %tmp0, align 1 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i8, i8* %tmp1, align 1 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp6 = load i8, i8* %tmp2, align 1 @@ -681,7 +681,7 @@ ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i8 0, i8* %tmp1, align 1 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i8 0, i8* %tmp2, align 1 ; VF_2-NEXT: Found an estimated cost of 16 for VF 2 For instruction: store i8 0, i8* %tmp3, align 1 -; VF_4-LABEL: Checking a loop in "i8_factor_4" +; VF_4-LABEL: Checking a loop in 'i8_factor_4' ; VF_4: Found an estimated cost of 96 for VF 4 For instruction: %tmp4 = load i8, i8* %tmp0, align 1 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i8, i8* %tmp1, align 1 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp6 = load i8, i8* %tmp2, align 1 @@ -690,7 +690,7 @@ ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i8 0, i8* %tmp1, align 1 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i8 0, i8* %tmp2, align 1 ; VF_4-NEXT: Found an estimated cost of 32 for VF 4 For instruction: store i8 0, i8* %tmp3, align 1 -; VF_8-LABEL: Checking a loop in "i8_factor_4" +; VF_8-LABEL: Checking a loop in 'i8_factor_4' ; VF_8: Found an estimated cost of 192 for VF 8 For instruction: %tmp4 = load i8, i8* %tmp0, align 1 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i8, i8* %tmp1, align 1 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load i8, i8* %tmp2, align 1 @@ -699,7 +699,7 @@ ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i8 0, i8* %tmp1, align 1 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i8 0, i8* %tmp2, align 1 ; VF_8-NEXT: Found an estimated cost of 64 for VF 8 For instruction: store i8 0, i8* %tmp3, align 1 -; VF_16-LABEL: Checking a loop in "i8_factor_4" +; VF_16-LABEL: Checking a loop in 'i8_factor_4' ; VF_16: Found an estimated cost of 384 for VF 16 For instruction: %tmp4 = load i8, i8* %tmp0, align 1 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i8, i8* %tmp1, align 1 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load i8, i8* %tmp2, align 1 @@ -735,7 +735,7 @@ entry: br label %for.body -; VF_2-LABEL: Checking a loop in "i16_factor_4" +; VF_2-LABEL: Checking a loop in 'i16_factor_4' ; VF_2: Found an estimated cost of 48 for VF 2 For instruction: %tmp4 = load i16, i16* %tmp0, align 2 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i16, i16* %tmp1, align 2 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp6 = load i16, i16* %tmp2, align 2 @@ -744,7 +744,7 @@ ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i16 0, i16* %tmp1, align 2 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i16 0, i16* %tmp2, align 2 ; VF_2-NEXT: Found an estimated cost of 16 for VF 2 For instruction: store i16 0, i16* %tmp3, align 2 -; VF_4-LABEL: Checking a loop in "i16_factor_4" +; VF_4-LABEL: Checking a loop in 'i16_factor_4' ; VF_4: Found an estimated cost of 96 for VF 4 For instruction: %tmp4 = load i16, i16* %tmp0, align 2 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i16, i16* %tmp1, align 2 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp6 = load i16, i16* %tmp2, align 2 @@ -753,7 +753,7 @@ ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i16 0, i16* %tmp1, align 2 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i16 0, i16* %tmp2, align 2 ; VF_4-NEXT: Found an estimated cost of 32 for VF 4 For instruction: store i16 0, i16* %tmp3, align 2 -; VF_8-LABEL: Checking a loop in "i16_factor_4" +; VF_8-LABEL: Checking a loop in 'i16_factor_4' ; VF_8: Found an estimated cost of 192 for VF 8 For instruction: %tmp4 = load i16, i16* %tmp0, align 2 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i16, i16* %tmp1, align 2 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load i16, i16* %tmp2, align 2 @@ -762,7 +762,7 @@ ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i16 0, i16* %tmp1, align 2 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i16 0, i16* %tmp2, align 2 ; VF_8-NEXT: Found an estimated cost of 64 for VF 8 For instruction: store i16 0, i16* %tmp3, align 2 -; VF_16-LABEL: Checking a loop in "i16_factor_4" +; VF_16-LABEL: Checking a loop in 'i16_factor_4' ; VF_16: Found an estimated cost of 384 for VF 16 For instruction: %tmp4 = load i16, i16* %tmp0, align 2 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i16, i16* %tmp1, align 2 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load i16, i16* %tmp2, align 2 @@ -798,7 +798,7 @@ entry: br label %for.body -; VF_2-LABEL: Checking a loop in "i32_factor_4" +; VF_2-LABEL: Checking a loop in 'i32_factor_4' ; VF_2: Found an estimated cost of 48 for VF 2 For instruction: %tmp4 = load i32, i32* %tmp0, align 4 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i32, i32* %tmp1, align 4 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp6 = load i32, i32* %tmp2, align 4 @@ -807,7 +807,7 @@ ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i32 0, i32* %tmp1, align 4 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i32 0, i32* %tmp2, align 4 ; VF_2-NEXT: Found an estimated cost of 16 for VF 2 For instruction: store i32 0, i32* %tmp3, align 4 -; VF_4-LABEL: Checking a loop in "i32_factor_4" +; VF_4-LABEL: Checking a loop in 'i32_factor_4' ; VF_4: Found an estimated cost of 32 for VF 4 For instruction: %tmp4 = load i32, i32* %tmp0, align 4 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i32, i32* %tmp1, align 4 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp6 = load i32, i32* %tmp2, align 4 @@ -816,7 +816,7 @@ ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i32 0, i32* %tmp1, align 4 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i32 0, i32* %tmp2, align 4 ; VF_4-NEXT: Found an estimated cost of 32 for VF 4 For instruction: store i32 0, i32* %tmp3, align 4 -; VF_8-LABEL: Checking a loop in "i32_factor_4" +; VF_8-LABEL: Checking a loop in 'i32_factor_4' ; VF_8: Found an estimated cost of 192 for VF 8 For instruction: %tmp4 = load i32, i32* %tmp0, align 4 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i32, i32* %tmp1, align 4 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load i32, i32* %tmp2, align 4 @@ -825,7 +825,7 @@ ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i32 0, i32* %tmp1, align 4 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i32 0, i32* %tmp2, align 4 ; VF_8-NEXT: Found an estimated cost of 64 for VF 8 For instruction: store i32 0, i32* %tmp3, align 4 -; VF_16-LABEL: Checking a loop in "i32_factor_4" +; VF_16-LABEL: Checking a loop in 'i32_factor_4' ; VF_16: Found an estimated cost of 384 for VF 16 For instruction: %tmp4 = load i32, i32* %tmp0, align 4 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i32, i32* %tmp1, align 4 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load i32, i32* %tmp2, align 4 @@ -861,7 +861,7 @@ entry: br label %for.body -; VF_2-LABEL: Checking a loop in "i64_factor_4" +; VF_2-LABEL: Checking a loop in 'i64_factor_4' ; VF_2: Found an estimated cost of 88 for VF 2 For instruction: %tmp4 = load i64, i64* %tmp0, align 8 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i64, i64* %tmp1, align 8 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp6 = load i64, i64* %tmp2, align 8 @@ -870,7 +870,7 @@ ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i64 0, i64* %tmp1, align 8 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i64 0, i64* %tmp2, align 8 ; VF_2-NEXT: Found an estimated cost of 24 for VF 2 For instruction: store i64 0, i64* %tmp3, align 8 -; VF_4-LABEL: Checking a loop in "i64_factor_4" +; VF_4-LABEL: Checking a loop in 'i64_factor_4' ; VF_4: Found an estimated cost of 176 for VF 4 For instruction: %tmp4 = load i64, i64* %tmp0, align 8 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i64, i64* %tmp1, align 8 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp6 = load i64, i64* %tmp2, align 8 @@ -879,7 +879,7 @@ ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i64 0, i64* %tmp1, align 8 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i64 0, i64* %tmp2, align 8 ; VF_4-NEXT: Found an estimated cost of 48 for VF 4 For instruction: store i64 0, i64* %tmp3, align 8 -; VF_8-LABEL: Checking a loop in "i64_factor_4" +; VF_8-LABEL: Checking a loop in 'i64_factor_4' ; VF_8: Found an estimated cost of 352 for VF 8 For instruction: %tmp4 = load i64, i64* %tmp0, align 8 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i64, i64* %tmp1, align 8 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load i64, i64* %tmp2, align 8 @@ -888,7 +888,7 @@ ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i64 0, i64* %tmp1, align 8 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i64 0, i64* %tmp2, align 8 ; VF_8-NEXT: Found an estimated cost of 96 for VF 8 For instruction: store i64 0, i64* %tmp3, align 8 -; VF_16-LABEL: Checking a loop in "i64_factor_4" +; VF_16-LABEL: Checking a loop in 'i64_factor_4' ; VF_16: Found an estimated cost of 704 for VF 16 For instruction: %tmp4 = load i64, i64* %tmp0, align 8 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i64, i64* %tmp1, align 8 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load i64, i64* %tmp2, align 8 @@ -924,7 +924,7 @@ entry: br label %for.body -; VF_2-LABEL: Checking a loop in "f16_factor_4" +; VF_2-LABEL: Checking a loop in 'f16_factor_4' ; VF_2: Found an estimated cost of 18 for VF 2 For instruction: %tmp4 = load half, half* %tmp0, align 2 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load half, half* %tmp1, align 2 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp6 = load half, half* %tmp2, align 2 @@ -933,7 +933,7 @@ ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store half 0xH0000, half* %tmp1, align 2 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store half 0xH0000, half* %tmp2, align 2 ; VF_2-NEXT: Found an estimated cost of 16 for VF 2 For instruction: store half 0xH0000, half* %tmp3, align 2 -; VF_4-LABEL: Checking a loop in "f16_factor_4" +; VF_4-LABEL: Checking a loop in 'f16_factor_4' ; VF_4: Found an estimated cost of 36 for VF 4 For instruction: %tmp4 = load half, half* %tmp0, align 2 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load half, half* %tmp1, align 2 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp6 = load half, half* %tmp2, align 2 @@ -942,7 +942,7 @@ ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store half 0xH0000, half* %tmp1, align 2 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store half 0xH0000, half* %tmp2, align 2 ; VF_4-NEXT: Found an estimated cost of 32 for VF 4 For instruction: store half 0xH0000, half* %tmp3, align 2 -; VF_8-LABEL: Checking a loop in "f16_factor_4" +; VF_8-LABEL: Checking a loop in 'f16_factor_4' ; VF_8: Found an estimated cost of 72 for VF 8 For instruction: %tmp4 = load half, half* %tmp0, align 2 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load half, half* %tmp1, align 2 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load half, half* %tmp2, align 2 @@ -951,7 +951,7 @@ ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store half 0xH0000, half* %tmp1, align 2 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store half 0xH0000, half* %tmp2, align 2 ; VF_8-NEXT: Found an estimated cost of 64 for VF 8 For instruction: store half 0xH0000, half* %tmp3, align 2 -; VF_16-LABEL: Checking a loop in "f16_factor_4" +; VF_16-LABEL: Checking a loop in 'f16_factor_4' ; VF_16: Found an estimated cost of 144 for VF 16 For instruction: %tmp4 = load half, half* %tmp0, align 2 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load half, half* %tmp1, align 2 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load half, half* %tmp2, align 2 @@ -987,7 +987,7 @@ entry: br label %for.body -; VF_2-LABEL: Checking a loop in "f32_factor_4" +; VF_2-LABEL: Checking a loop in 'f32_factor_4' ; VF_2: Found an estimated cost of 20 for VF 2 For instruction: %tmp4 = load float, float* %tmp0, align 4 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load float, float* %tmp1, align 4 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp6 = load float, float* %tmp2, align 4 @@ -996,7 +996,7 @@ ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store float 0.000000e+00, float* %tmp1, align 4 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store float 0.000000e+00, float* %tmp2, align 4 ; VF_2-NEXT: Found an estimated cost of 16 for VF 2 For instruction: store float 0.000000e+00, float* %tmp3, align 4 -; VF_4-LABEL: Checking a loop in "f32_factor_4" +; VF_4-LABEL: Checking a loop in 'f32_factor_4' ; VF_4: Found an estimated cost of 32 for VF 4 For instruction: %tmp4 = load float, float* %tmp0, align 4 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load float, float* %tmp1, align 4 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp6 = load float, float* %tmp2, align 4 @@ -1005,7 +1005,7 @@ ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store float 0.000000e+00, float* %tmp1, align 4 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store float 0.000000e+00, float* %tmp2, align 4 ; VF_4-NEXT: Found an estimated cost of 32 for VF 4 For instruction: store float 0.000000e+00, float* %tmp3, align 4 -; VF_8-LABEL: Checking a loop in "f32_factor_4" +; VF_8-LABEL: Checking a loop in 'f32_factor_4' ; VF_8: Found an estimated cost of 80 for VF 8 For instruction: %tmp4 = load float, float* %tmp0, align 4 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load float, float* %tmp1, align 4 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load float, float* %tmp2, align 4 @@ -1014,7 +1014,7 @@ ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store float 0.000000e+00, float* %tmp1, align 4 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store float 0.000000e+00, float* %tmp2, align 4 ; VF_8-NEXT: Found an estimated cost of 64 for VF 8 For instruction: store float 0.000000e+00, float* %tmp3, align 4 -; VF_16-LABEL: Checking a loop in "f32_factor_4" +; VF_16-LABEL: Checking a loop in 'f32_factor_4' ; VF_16: Found an estimated cost of 160 for VF 16 For instruction: %tmp4 = load float, float* %tmp0, align 4 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load float, float* %tmp1, align 4 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load float, float* %tmp2, align 4 @@ -1050,7 +1050,7 @@ entry: br label %for.body -; VF_2-LABEL: Checking a loop in "f64_factor_4" +; VF_2-LABEL: Checking a loop in 'f64_factor_4' ; VF_2: Found an estimated cost of 24 for VF 2 For instruction: %tmp4 = load double, double* %tmp0, align 8 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load double, double* %tmp1, align 8 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp6 = load double, double* %tmp2, align 8 @@ -1059,7 +1059,7 @@ ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store double 0.000000e+00, double* %tmp1, align 8 ; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store double 0.000000e+00, double* %tmp2, align 8 ; VF_2-NEXT: Found an estimated cost of 16 for VF 2 For instruction: store double 0.000000e+00, double* %tmp3, align 8 -; VF_4-LABEL: Checking a loop in "f64_factor_4" +; VF_4-LABEL: Checking a loop in 'f64_factor_4' ; VF_4: Found an estimated cost of 48 for VF 4 For instruction: %tmp4 = load double, double* %tmp0, align 8 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load double, double* %tmp1, align 8 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp6 = load double, double* %tmp2, align 8 @@ -1068,7 +1068,7 @@ ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store double 0.000000e+00, double* %tmp1, align 8 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store double 0.000000e+00, double* %tmp2, align 8 ; VF_4-NEXT: Found an estimated cost of 32 for VF 4 For instruction: store double 0.000000e+00, double* %tmp3, align 8 -; VF_8-LABEL: Checking a loop in "f64_factor_4" +; VF_8-LABEL: Checking a loop in 'f64_factor_4' ; VF_8: Found an estimated cost of 96 for VF 8 For instruction: %tmp4 = load double, double* %tmp0, align 8 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load double, double* %tmp1, align 8 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load double, double* %tmp2, align 8 @@ -1077,7 +1077,7 @@ ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store double 0.000000e+00, double* %tmp1, align 8 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store double 0.000000e+00, double* %tmp2, align 8 ; VF_8-NEXT: Found an estimated cost of 64 for VF 8 For instruction: store double 0.000000e+00, double* %tmp3, align 8 -; VF_16-LABEL: Checking a loop in "f64_factor_4" +; VF_16-LABEL: Checking a loop in 'f64_factor_4' ; VF_16: Found an estimated cost of 192 for VF 16 For instruction: %tmp4 = load double, double* %tmp0, align 8 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load double, double* %tmp1, align 8 ; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load double, double* %tmp2, align 8 diff --git a/llvm/test/Transforms/LoopVectorize/SystemZ/mem-interleaving-costs-02.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/mem-interleaving-costs-02.ll --- a/llvm/test/Transforms/LoopVectorize/SystemZ/mem-interleaving-costs-02.ll +++ b/llvm/test/Transforms/LoopVectorize/SystemZ/mem-interleaving-costs-02.ll @@ -10,7 +10,7 @@ ; registers. Each of the 4 vector values must then be constructed from the ; two vector registers using one vperm each, which gives a cost of 2 + 4 = 6. ; -; CHECK: LV: Checking a loop in "fun0" +; CHECK: LV: Checking a loop in 'fun0' ; CHECK: LV: Found an estimated cost of 6 for VF 4 For instruction: %ld0 = load i16 ; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction: %ld1 = load i16 ; CHECK: LV: Found an estimated cost of 0 for VF 4 For instruction: %ld2 = load i16 @@ -48,7 +48,7 @@ ; 3 vector registers, and then constructing the vector value with two vperms, ; which gives a cost of 5. ; -; CHECK: LV: Checking a loop in "fun1" +; CHECK: LV: Checking a loop in 'fun1' ; CHECK: LV: Found an estimated cost of 5 for VF 16 For instruction: %ld0 = load i8 define void @fun1(i8 *%ptr, i8 *%dst) { entry: @@ -74,7 +74,7 @@ ; 32. At VF=2, this means loading 2 vector registers, and using 4 vperms to ; produce the vector values, which gives a cost of 6. ; -; CHECK: LV: Checking a loop in "fun2" +; CHECK: LV: Checking a loop in 'fun2' ; CHECK: LV: Found an estimated cost of 6 for VF 2 For instruction: %ld0 = load i8 ; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: %ld1 = load i8 ; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: %ld2 = load i8 @@ -114,7 +114,7 @@ ; as in fun2, except the stride makes the second iterations values overlap a ; vector register boundary. ; -; CHECK: LV: Checking a loop in "fun3" +; CHECK: LV: Checking a loop in 'fun3' ; CHECK: LV: Found an estimated cost of 7 for VF 2 For instruction: %ld0 = load i8 ; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: %ld1 = load i8 ; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: %ld2 = load i8 diff --git a/llvm/test/Transforms/LoopVectorize/X86/already-vectorized.ll b/llvm/test/Transforms/LoopVectorize/X86/already-vectorized.ll --- a/llvm/test/Transforms/LoopVectorize/X86/already-vectorized.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/already-vectorized.ll @@ -12,7 +12,7 @@ ; Function Attrs: nounwind readonly uwtable define i32 @vect() { -; CHECK: LV: Checking a loop in "vect" +; CHECK: LV: Checking a loop in 'vect' entry: br label %for.body diff --git a/llvm/test/Transforms/LoopVectorize/X86/reg-usage-debug.ll b/llvm/test/Transforms/LoopVectorize/X86/reg-usage-debug.ll --- a/llvm/test/Transforms/LoopVectorize/X86/reg-usage-debug.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/reg-usage-debug.ll @@ -21,7 +21,7 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" -; CHECK: LV: Checking a loop in "test_g" +; CHECK: LV: Checking a loop in 'test_g' ; CHECK: LV(REG): Found max usage: 2 item ; CHECK-NEXT: LV(REG): RegisterClass: Generic::ScalarRC, 2 registers ; CHECK-NEXT: LV(REG): RegisterClass: Generic::VectorRC, 2 registers @@ -63,7 +63,7 @@ ret i32 %r.0.lcssa, !dbg !38 } -; CHECK: LV: Checking a loop in "test" +; CHECK: LV: Checking a loop in 'test' ; CHECK: LV(REG): Found max usage: 2 item ; CHECK-NEXT: LV(REG): RegisterClass: Generic::ScalarRC, 2 registers ; CHECK-NEXT: LV(REG): RegisterClass: Generic::VectorRC, 2 registers diff --git a/llvm/test/Transforms/LoopVectorize/X86/uniformshift.ll b/llvm/test/Transforms/LoopVectorize/X86/uniformshift.ll --- a/llvm/test/Transforms/LoopVectorize/X86/uniformshift.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/uniformshift.ll @@ -1,10 +1,10 @@ ; RUN: opt -mtriple=x86_64-apple-darwin -mattr=+sse2 -loop-vectorize -debug-only=loop-vectorize -S < %s 2>&1 | FileCheck %s ; REQUIRES: asserts -; CHECK: "foo" +; CHECK: 'foo' ; CHECK: LV: Found an estimated cost of 1 for VF 4 For instruction: %shift = ashr i32 %val, %k define void @foo(i32* nocapture %p, i32 %k) local_unnamed_addr #0 { -entry: +entry: br label %body body: diff --git a/llvm/test/Transforms/LoopVectorize/X86/vector-scalar-select-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/vector-scalar-select-cost.ll --- a/llvm/test/Transforms/LoopVectorize/X86/vector-scalar-select-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/vector-scalar-select-cost.ll @@ -9,7 +9,7 @@ @b = common global [2048 x i32] zeroinitializer, align 16 @c = common global [2048 x i32] zeroinitializer, align 16 -; CHECK: Checking a loop in "scalarselect" +; CHECK: Checking a loop in 'scalarselect' define void @scalarselect(i1 %cond) { br label %1 @@ -36,7 +36,7 @@ ret void } -; CHECK: Checking a loop in "vectorselect" +; CHECK: Checking a loop in 'vectorselect' define void @vectorselect(i1 %cond) { br label %1 diff --git a/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll b/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll --- a/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll +++ b/llvm/test/Transforms/LoopVectorize/icmp-uniforms.ll @@ -35,7 +35,7 @@ } ; Check for crash exposed by D76992. -; CHECK-LABEL: "test" +; CHECK-LABEL: 'test' ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-EMPTY: diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-masked-group.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-masked-group.ll --- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses-masked-group.ll +++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses-masked-group.ll @@ -30,11 +30,11 @@ ;} -; STRIDED_UNMASKED: LV: Checking a loop in "masked_strided1" +; STRIDED_UNMASKED: LV: Checking a loop in 'masked_strided1' ; STRIDED_UNMASKED: LV: Analyzing interleaved accesses... ; STRIDED_UNMASKED-NOT: LV: Creating an interleave group -; STRIDED_MASKED: LV: Checking a loop in "masked_strided1" +; STRIDED_MASKED: LV: Checking a loop in 'masked_strided1' ; STRIDED_MASKED: LV: Analyzing interleaved accesses... ; STRIDED_MASKED-NEXT: LV: Creating an interleave group with: store i8 %{{.*}}, i8* %{{.*}}, align 1 ; STRIDED_MASKED-NEXT: LV: Inserted: store i8 %{{.*}}, i8* %{{.*}}, align 1 @@ -63,13 +63,13 @@ ; } ;} -; STRIDED_UNMASKED: LV: Checking a loop in "masked_strided2" +; STRIDED_UNMASKED: LV: Checking a loop in 'masked_strided2' ; STRIDED_UNMASKED: LV: Analyzing interleaved accesses... ; STRIDED_UNMASKED-NEXT: LV: Creating an interleave group with: store i8 1, i8* %{{.*}}, align 1 ; STRIDED_UNMASKED-NEXT: LV: Invalidate candidate interleaved store group due to gaps. ; STRIDED_UNMASKED-NOT: LV: Creating an interleave group -; STRIDED_MASKED: LV: Checking a loop in "masked_strided2" +; STRIDED_MASKED: LV: Checking a loop in 'masked_strided2' ; STRIDED_MASKED: LV: Analyzing interleaved accesses... ; STRIDED_MASKED-NEXT: LV: Creating an interleave group with: store i8 2, i8* %{{.*}}, align 1 ; STRIDED_MASKED-NEXT: LV: Creating an interleave group with: store i8 1, i8* %{{.*}}, align 1 @@ -97,11 +97,11 @@ ;} -; STRIDED_UNMASKED: LV: Checking a loop in "masked_strided3" +; STRIDED_UNMASKED: LV: Checking a loop in 'masked_strided3' ; STRIDED_UNMASKED: LV: Analyzing interleaved accesses... ; STRIDED_UNMASKED-NOT: LV: Creating an interleave group -; STRIDED_MASKED: LV: Checking a loop in "masked_strided3" +; STRIDED_MASKED: LV: Checking a loop in 'masked_strided3' ; STRIDED_MASKED: LV: Analyzing interleaved accesses... ; STRIDED_MASKED-NEXT: LV: Creating an interleave group with: store i8 2, i8* %{{.*}}, align 1 ; STRIDED_MASKED-NEXT: LV: Creating an interleave group with: store i8 1, i8* %{{.*}}, align 1 diff --git a/llvm/test/Transforms/LoopVectorize/loop-legality-checks.ll b/llvm/test/Transforms/LoopVectorize/loop-legality-checks.ll --- a/llvm/test/Transforms/LoopVectorize/loop-legality-checks.ll +++ b/llvm/test/Transforms/LoopVectorize/loop-legality-checks.ll @@ -2,7 +2,7 @@ ; REQUIRES: asserts ; Make sure LV legal bails out when there is a non-int, non-ptr phi -; CHECK-LABEL: "invalid_phi_types" +; CHECK-LABEL: 'invalid_phi_types' ; CHECK: LV: Not vectorizing: Found a non-int non-pointer PHI. define i32 @invalid_phi_types() { entry: @@ -21,7 +21,7 @@ ; D40973 ; Make sure LV legal bails out when the loop doesn't have a legal pre-header. -; CHECK-LABEL: "inc" +; CHECK-LABEL: 'inc' ; CHECK: LV: Not vectorizing: Loop doesn't have a legal pre-header. define void @inc(i32 %n, i8* %P) { %1 = icmp sgt i32 %n, 0 diff --git a/llvm/test/Transforms/LoopVectorize/nounroll.ll b/llvm/test/Transforms/LoopVectorize/nounroll.ll --- a/llvm/test/Transforms/LoopVectorize/nounroll.ll +++ b/llvm/test/Transforms/LoopVectorize/nounroll.ll @@ -3,7 +3,7 @@ target datalayout = "e-m:e-i64:64-n32:64-S128-v256:256:256-v512:512:512" -; CHECK: LV: Checking a loop in "f1" +; CHECK: LV: Checking a loop in 'f1' ; CHECK: LV: Loop hints: force=? width=0 interleave=1 define dso_local void @f1(i32 signext %n, i32* %A) { entry: @@ -30,7 +30,7 @@ ret void } -; CHECK: LV: Checking a loop in "f2" +; CHECK: LV: Checking a loop in 'f2' ; CHECK: LV: Loop hints: force=? width=0 interleave=4 define dso_local void @f2(i32 signext %n, i32* %A) { entry: @@ -57,7 +57,7 @@ ret void } -; CHECK: LV: Checking a loop in "f3" +; CHECK: LV: Checking a loop in 'f3' ; CHECK: LV: Loop hints: force=? width=0 interleave=1 define dso_local void @f3(i32 signext %n, i32* %A) { entry: diff --git a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-limitations.ll b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-limitations.ll --- a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-limitations.ll +++ b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-limitations.ll @@ -4,7 +4,7 @@ target datalayout = "e-m:e-i64:64-n32:64-v256:256:256-v512:512:512" ; Currently we cannot handle live-out variables that are recurrences. -; CHECK: LV: Checking a loop in "f2" +; CHECK: LV: Checking a loop in 'f2' ; CHECK: LEV: Unable to vectorize epilogue because the loop is not a supported candidate. define signext i32 @f2(i8* noalias %A, i32 signext %n) { @@ -38,7 +38,7 @@ } ; Currently we cannot handle widended/truncated inductions. -; CHECK: LV: Checking a loop in "f3" +; CHECK: LV: Checking a loop in 'f3' ; CHECK: LEV: Unable to vectorize epilogue because the loop is not a supported candidate. define void @f3(i8* noalias %A, i32 signext %n) { diff --git a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-scalable.ll b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-scalable.ll --- a/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-scalable.ll +++ b/llvm/test/Transforms/LoopVectorize/optimal-epilog-vectorization-scalable.ll @@ -4,7 +4,7 @@ target datalayout = "e-m:e-i64:64-n32:64-v256:256:256-v512:512:512" ; Currently we cannot handle scalable vectorization factors. -; CHECK: LV: Checking a loop in "f1" +; CHECK: LV: Checking a loop in 'f1' ; CHECK: LEV: Epilogue vectorization factor is forced. ; CHECK: Epilogue Loop VF:2, Epilogue Loop UF:1 diff --git a/llvm/test/Transforms/LoopVectorize/pr39099.ll b/llvm/test/Transforms/LoopVectorize/pr39099.ll --- a/llvm/test/Transforms/LoopVectorize/pr39099.ll +++ b/llvm/test/Transforms/LoopVectorize/pr39099.ll @@ -6,7 +6,7 @@ ; Ensure that we don't create interleave groups for predicated ; strided accesses. -; CHECK: LV: Checking a loop in "masked_strided" +; CHECK: LV: Checking a loop in 'masked_strided' ; CHECK: LV: Analyzing interleaved accesses... ; CHECK-NOT: LV: Creating an interleave group diff --git a/llvm/test/Transforms/LoopVectorize/vect.omp.persistence.ll b/llvm/test/Transforms/LoopVectorize/vect.omp.persistence.ll --- a/llvm/test/Transforms/LoopVectorize/vect.omp.persistence.ll +++ b/llvm/test/Transforms/LoopVectorize/vect.omp.persistence.ll @@ -1,7 +1,7 @@ ; RUN: opt < %s -O2 -force-vector-interleave=2 -force-vector-width=4 -debug-only=loop-vectorize -S 2>&1 | FileCheck %s ; REQUIRES: asserts -; CHECK: LV: Checking a loop in "foo" +; CHECK: LV: Checking a loop in 'foo' ; CHECK: LV: Loop hints: force=enabled target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" diff --git a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll --- a/llvm/test/Transforms/LoopVectorize/vplan-printing.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-printing.ll @@ -7,7 +7,7 @@ ; Tests for printing VPlans. define void @print_call_and_memory(i64 %n, float* noalias %y, float* noalias %x) nounwind uwtable { -; CHECK-LABEL: Checking a loop in "print_call_and_memory" +; CHECK-LABEL: Checking a loop in 'print_call_and_memory' ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-EMPTY: @@ -47,7 +47,7 @@ } define void @print_widen_gep_and_select(i64 %n, float* noalias %y, float* noalias %x, float* %z) nounwind uwtable { -; CHECK-LABEL: Checking a loop in "print_widen_gep_and_select" +; CHECK-LABEL: Checking a loop in 'print_widen_gep_and_select' ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-EMPTY: @@ -91,7 +91,7 @@ } define float @print_reduction(i64 %n, float* noalias %y) { -; CHECK-LABEL: Checking a loop in "print_reduction" +; CHECK-LABEL: Checking a loop in 'print_reduction' ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-EMPTY: @@ -128,7 +128,7 @@ } define void @print_replicate_predicated_phi(i64 %n, i64* %x) { -; CHECK-LABEL: Checking a loop in "print_replicate_predicated_phi" +; CHECK-LABEL: Checking a loop in 'print_replicate_predicated_phi' ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-EMPTY: @@ -201,7 +201,7 @@ @CD = common global [1024 x i32] zeroinitializer, align 4 define void @print_interleave_groups(i32 %C, i32 %D) { -; CHECK-LABEL: Checking a loop in "print_interleave_groups" +; CHECK-LABEL: Checking a loop in 'print_interleave_groups' ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-EMPTY: @@ -261,7 +261,7 @@ } define float @print_fmuladd_strict(float* %a, float* %b, i64 %n) { -; CHECK-LABEL: Checking a loop in "print_fmuladd_strict" +; CHECK-LABEL: Checking a loop in 'print_fmuladd_strict' ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-EMPTY: @@ -301,7 +301,7 @@ } define void @debug_loc_vpinstruction(i32* nocapture %asd, i32* nocapture %bsd) !dbg !5 { -; CHECK-LABEL: Checking a loop in "debug_loc_vpinstruction" +; CHECK-LABEL: Checking a loop in 'debug_loc_vpinstruction' ; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-EMPTY: diff --git a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll --- a/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll +++ b/llvm/test/Transforms/LoopVectorize/vplan-sink-scalars-and-merge.ll @@ -9,7 +9,7 @@ @c = common global [2048 x i32] zeroinitializer, align 16 -; CHECK-LABEL: LV: Checking a loop in "sink1" +; CHECK-LABEL: LV: Checking a loop in 'sink1' ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-EMPTY: @@ -75,7 +75,7 @@ ret void } -; CHECK-LABEL: LV: Checking a loop in "sink2" +; CHECK-LABEL: LV: Checking a loop in 'sink2' ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-EMPTY: @@ -154,7 +154,7 @@ ret void } -; CHECK-LABEL: LV: Checking a loop in "sink3" +; CHECK-LABEL: LV: Checking a loop in 'sink3' ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-EMPTY: @@ -235,7 +235,7 @@ ; Make sure we do not sink uniform instructions. define void @uniform_gep(i64 %k, i16* noalias %A, i16* noalias %B) { -; CHECK-LABEL: LV: Checking a loop in "uniform_gep" +; CHECK-LABEL: LV: Checking a loop in 'uniform_gep' ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-EMPTY: @@ -325,7 +325,7 @@ ; Loop with predicated load. define void @pred_cfg1(i32 %k, i32 %j) { -; CHECK-LABEL: LV: Checking a loop in "pred_cfg1" +; CHECK-LABEL: LV: Checking a loop in 'pred_cfg1' ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-EMPTY: @@ -426,7 +426,7 @@ ; Loop with predicated load and store in separate blocks, store depends on ; loaded value. define void @pred_cfg2(i32 %k, i32 %j) { -; CHECK-LABEL: LV: Checking a loop in "pred_cfg2" +; CHECK-LABEL: LV: Checking a loop in 'pred_cfg2' ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-EMPTY: @@ -542,7 +542,7 @@ ; Loop with predicated load and store in separate blocks, store does not depend ; on loaded value. define void @pred_cfg3(i32 %k, i32 %j) { -; CHECK-LABEL: LV: Checking a loop in "pred_cfg3" +; CHECK-LABEL: LV: Checking a loop in 'pred_cfg3' ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-EMPTY: @@ -652,7 +652,7 @@ } define void @merge_3_replicate_region(i32 %k, i32 %j) { -; CHECK-LABEL: LV: Checking a loop in "merge_3_replicate_region" +; CHECK-LABEL: LV: Checking a loop in 'merge_3_replicate_region' ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-EMPTY: @@ -767,7 +767,7 @@ define void @update_2_uses_in_same_recipe_in_merged_block(i32 %k) { -; CHECK-LABEL: LV: Checking a loop in "update_2_uses_in_same_recipe_in_merged_block" +; CHECK-LABEL: LV: Checking a loop in 'update_2_uses_in_same_recipe_in_merged_block' ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-EMPTY: @@ -834,7 +834,7 @@ } define void @recipe_in_merge_candidate_used_by_first_order_recurrence(i32 %k) { -; CHECK-LABEL: LV: Checking a loop in "recipe_in_merge_candidate_used_by_first_order_recurrence" +; CHECK-LABEL: LV: Checking a loop in 'recipe_in_merge_candidate_used_by_first_order_recurrence' ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-EMPTY: @@ -918,7 +918,7 @@ } define void @update_multiple_users(i16* noalias %src, i8* noalias %dst, i1 %c) { -; CHECK-LABEL: LV: Checking a loop in "update_multiple_users" +; CHECK-LABEL: LV: Checking a loop in 'update_multiple_users' ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-EMPTY: @@ -989,7 +989,7 @@ } define void @sinking_requires_duplication(float* %addr) { -; CHECK-LABEL: LV: Checking a loop in "sinking_requires_duplication" +; CHECK-LABEL: LV: Checking a loop in 'sinking_requires_duplication' ; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' { ; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count ; CHECK-EMPTY: diff --git a/llvm/test/Transforms/OpenMP/custom_state_machines.ll b/llvm/test/Transforms/OpenMP/custom_state_machines.ll --- a/llvm/test/Transforms/OpenMP/custom_state_machines.ll +++ b/llvm/test/Transforms/OpenMP/custom_state_machines.ll @@ -974,12 +974,13 @@ ; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 ; AMDGPU-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 ; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR9]] -; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 -; AMDGPU-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef @__omp_outlined__2_wrapper.ID, i8** noundef [[TMP1]], i64 noundef 0) +; AMDGPU-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +; AMDGPU-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef @__omp_outlined__2_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0) ; AMDGPU-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8]] -; AMDGPU-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** -; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* noundef @__omp_outlined__3_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0) +; AMDGPU-NEXT: [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** +; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* noundef @__omp_outlined__3_wrapper.ID, i8** noundef [[TMP3]], i64 noundef 0) ; AMDGPU-NEXT: ret void ; ; @@ -1106,9 +1107,10 @@ ; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR9]] ; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR8]] ; AMDGPU-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8]] -; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 -; AMDGPU-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__5 to i8*), i8* noundef @__omp_outlined__5_wrapper.ID, i8** noundef [[TMP1]], i64 noundef 0) +; AMDGPU-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +; AMDGPU-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__5 to i8*), i8* noundef @__omp_outlined__5_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0) ; AMDGPU-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR8]] ; AMDGPU-NEXT: ret void ; @@ -1251,12 +1253,13 @@ ; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 ; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 ; AMDGPU-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 -; AMDGPU-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__7 to i8*), i8* noundef @__omp_outlined__7_wrapper.ID, i8** noundef [[TMP1]], i64 noundef 0) +; AMDGPU-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +; AMDGPU-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__7 to i8*), i8* noundef @__omp_outlined__7_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0) ; AMDGPU-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR10]] -; AMDGPU-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** -; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__8 to i8*), i8* noundef @__omp_outlined__8_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0) +; AMDGPU-NEXT: [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** +; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__8 to i8*), i8* noundef @__omp_outlined__8_wrapper.ID, i8** noundef [[TMP3]], i64 noundef 0) ; AMDGPU-NEXT: ret void ; ; @@ -1375,12 +1378,13 @@ ; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 ; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 ; AMDGPU-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 -; AMDGPU-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__10 to i8*), i8* noundef @__omp_outlined__10_wrapper.ID, i8** noundef [[TMP1]], i64 noundef 0) +; AMDGPU-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +; AMDGPU-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__10 to i8*), i8* noundef @__omp_outlined__10_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0) ; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR9]] -; AMDGPU-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** -; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__11 to i8*), i8* noundef @__omp_outlined__11_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0) +; AMDGPU-NEXT: [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** +; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__11 to i8*), i8* noundef @__omp_outlined__11_wrapper.ID, i8** noundef [[TMP3]], i64 noundef 0) ; AMDGPU-NEXT: ret void ; ; @@ -1500,11 +1504,12 @@ ; AMDGPU-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 ; AMDGPU-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 ; AMDGPU-NEXT: call void @unknown_no_openmp() #[[ATTR9]] -; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 -; AMDGPU-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__13 to i8*), i8* noundef @__omp_outlined__13_wrapper.ID, i8** noundef [[TMP1]], i64 noundef 0) -; AMDGPU-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** -; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__14 to i8*), i8* noundef @__omp_outlined__14_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0) +; AMDGPU-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; AMDGPU-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +; AMDGPU-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__13 to i8*), i8* noundef @__omp_outlined__13_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0) +; AMDGPU-NEXT: [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** +; AMDGPU-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__14 to i8*), i8* noundef @__omp_outlined__14_wrapper.ID, i8** noundef [[TMP3]], i64 noundef 0) ; AMDGPU-NEXT: ret void ; ; @@ -1626,7 +1631,8 @@ ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; AMDGPU-NEXT: [[CMP:%.*]] = icmp eq i32 [[A]], 0 +; AMDGPU-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; AMDGPU-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 ; AMDGPU-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] ; AMDGPU: if.then: ; AMDGPU-NEXT: br label [[RETURN:%.*]] @@ -1949,12 +1955,13 @@ ; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 ; NVPTX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 ; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR9]] -; NVPTX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 -; NVPTX-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef @__omp_outlined__2_wrapper.ID, i8** noundef [[TMP1]], i64 noundef 0) +; NVPTX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; NVPTX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +; NVPTX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef @__omp_outlined__2_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0) ; NVPTX-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8]] -; NVPTX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** -; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* noundef @__omp_outlined__3_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0) +; NVPTX-NEXT: [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** +; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* noundef @__omp_outlined__3_wrapper.ID, i8** noundef [[TMP3]], i64 noundef 0) ; NVPTX-NEXT: ret void ; ; @@ -2080,9 +2087,10 @@ ; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR9]] ; NVPTX-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR8]] ; NVPTX-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8]] -; NVPTX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 -; NVPTX-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__5 to i8*), i8* noundef @__omp_outlined__5_wrapper.ID, i8** noundef [[TMP1]], i64 noundef 0) +; NVPTX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; NVPTX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +; NVPTX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__5 to i8*), i8* noundef @__omp_outlined__5_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0) ; NVPTX-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR8]] ; NVPTX-NEXT: ret void ; @@ -2224,12 +2232,13 @@ ; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 ; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 ; NVPTX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -; NVPTX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 -; NVPTX-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__7 to i8*), i8* noundef @__omp_outlined__7_wrapper.ID, i8** noundef [[TMP1]], i64 noundef 0) +; NVPTX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; NVPTX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +; NVPTX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__7 to i8*), i8* noundef @__omp_outlined__7_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0) ; NVPTX-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR10]] -; NVPTX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** -; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__8 to i8*), i8* noundef @__omp_outlined__8_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0) +; NVPTX-NEXT: [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** +; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__8 to i8*), i8* noundef @__omp_outlined__8_wrapper.ID, i8** noundef [[TMP3]], i64 noundef 0) ; NVPTX-NEXT: ret void ; ; @@ -2347,12 +2356,13 @@ ; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 ; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 ; NVPTX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -; NVPTX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 -; NVPTX-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__10 to i8*), i8* noundef @__omp_outlined__10_wrapper.ID, i8** noundef [[TMP1]], i64 noundef 0) +; NVPTX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; NVPTX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +; NVPTX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__10 to i8*), i8* noundef @__omp_outlined__10_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0) ; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR9]] -; NVPTX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** -; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__11 to i8*), i8* noundef @__omp_outlined__11_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0) +; NVPTX-NEXT: [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** +; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__11 to i8*), i8* noundef @__omp_outlined__11_wrapper.ID, i8** noundef [[TMP3]], i64 noundef 0) ; NVPTX-NEXT: ret void ; ; @@ -2471,11 +2481,12 @@ ; NVPTX-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 ; NVPTX-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 ; NVPTX-NEXT: call void @unknown_no_openmp() #[[ATTR9]] -; NVPTX-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 -; NVPTX-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__13 to i8*), i8* noundef @__omp_outlined__13_wrapper.ID, i8** noundef [[TMP1]], i64 noundef 0) -; NVPTX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** -; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__14 to i8*), i8* noundef @__omp_outlined__14_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0) +; NVPTX-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; NVPTX-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +; NVPTX-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__13 to i8*), i8* noundef @__omp_outlined__13_wrapper.ID, i8** noundef [[TMP2]], i64 noundef 0) +; NVPTX-NEXT: [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** +; NVPTX-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__14 to i8*), i8* noundef @__omp_outlined__14_wrapper.ID, i8** noundef [[TMP3]], i64 noundef 0) ; NVPTX-NEXT: ret void ; ; @@ -2596,7 +2607,8 @@ ; NVPTX-NEXT: entry: ; NVPTX-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; NVPTX-NEXT: [[CMP:%.*]] = icmp eq i32 [[A]], 0 +; NVPTX-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; NVPTX-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 ; NVPTX-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] ; NVPTX: if.then: ; NVPTX-NEXT: br label [[RETURN:%.*]] @@ -2878,12 +2890,13 @@ ; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 ; AMDGPU-DISABLED-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 ; AMDGPU-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR9]] -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 -; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0) ; AMDGPU-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8]] -; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** -; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0) +; AMDGPU-DISABLED-NEXT: [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** +; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** noundef [[TMP3]], i64 noundef 0) ; AMDGPU-DISABLED-NEXT: ret void ; ; @@ -2963,9 +2976,10 @@ ; AMDGPU-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR9]] ; AMDGPU-DISABLED-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR8]] ; AMDGPU-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8]] -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 -; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__5 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__5_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__5 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__5_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0) ; AMDGPU-DISABLED-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR8]] ; AMDGPU-DISABLED-NEXT: ret void ; @@ -3065,12 +3079,13 @@ ; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 ; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 ; AMDGPU-DISABLED-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 -; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__7 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__7_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__7 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__7_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0) ; AMDGPU-DISABLED-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR10]] -; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** -; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__8 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__8_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0) +; AMDGPU-DISABLED-NEXT: [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** +; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__8 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__8_wrapper to i8*), i8** noundef [[TMP3]], i64 noundef 0) ; AMDGPU-DISABLED-NEXT: ret void ; ; @@ -3148,12 +3163,13 @@ ; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 ; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 ; AMDGPU-DISABLED-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 -; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__10 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__10_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__10 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__10_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0) ; AMDGPU-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR9]] -; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** -; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__11 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__11_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0) +; AMDGPU-DISABLED-NEXT: [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** +; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__11 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__11_wrapper to i8*), i8** noundef [[TMP3]], i64 noundef 0) ; AMDGPU-DISABLED-NEXT: ret void ; ; @@ -3232,11 +3248,12 @@ ; AMDGPU-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 ; AMDGPU-DISABLED-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 ; AMDGPU-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR9]] -; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 -; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__13 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__13_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) -; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** -; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__14 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__14_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0) +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; AMDGPU-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +; AMDGPU-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__13 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__13_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0) +; AMDGPU-DISABLED-NEXT: [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** +; AMDGPU-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__14 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__14_wrapper to i8*), i8** noundef [[TMP3]], i64 noundef 0) ; AMDGPU-DISABLED-NEXT: ret void ; ; @@ -3321,7 +3338,8 @@ ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; AMDGPU-DISABLED-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; AMDGPU-DISABLED-NEXT: [[CMP:%.*]] = icmp eq i32 [[A]], 0 +; AMDGPU-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; AMDGPU-DISABLED-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 ; AMDGPU-DISABLED-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] ; AMDGPU-DISABLED: if.then: ; AMDGPU-DISABLED-NEXT: br label [[RETURN:%.*]] @@ -3573,12 +3591,13 @@ ; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 ; NVPTX-DISABLED-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 ; NVPTX-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR9]] -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 -; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0) ; NVPTX-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8]] -; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** -; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0) +; NVPTX-DISABLED-NEXT: [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** +; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** noundef [[TMP3]], i64 noundef 0) ; NVPTX-DISABLED-NEXT: ret void ; ; @@ -3658,9 +3677,10 @@ ; NVPTX-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR9]] ; NVPTX-DISABLED-NEXT: call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR8]] ; NVPTX-DISABLED-NEXT: call void @no_parallel_region_in_here.internalized() #[[ATTR8]] -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 -; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__5 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__5_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__5 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__5_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0) ; NVPTX-DISABLED-NEXT: call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR8]] ; NVPTX-DISABLED-NEXT: ret void ; @@ -3760,12 +3780,13 @@ ; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 ; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 ; NVPTX-DISABLED-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 -; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__7 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__7_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__7 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__7_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0) ; NVPTX-DISABLED-NEXT: [[CALL:%.*]] = call i32 @unknown() #[[ATTR10]] -; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** -; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__8 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__8_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0) +; NVPTX-DISABLED-NEXT: [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** +; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__8 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__8_wrapper to i8*), i8** noundef [[TMP3]], i64 noundef 0) ; NVPTX-DISABLED-NEXT: ret void ; ; @@ -3843,12 +3864,13 @@ ; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 ; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 ; NVPTX-DISABLED-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 -; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__10 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__10_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__10 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__10_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0) ; NVPTX-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR9]] -; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** -; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__11 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__11_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0) +; NVPTX-DISABLED-NEXT: [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** +; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__11 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__11_wrapper to i8*), i8** noundef [[TMP3]], i64 noundef 0) ; NVPTX-DISABLED-NEXT: ret void ; ; @@ -3927,11 +3949,12 @@ ; NVPTX-DISABLED-NEXT: [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8 ; NVPTX-DISABLED-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 ; NVPTX-DISABLED-NEXT: call void @unknown_no_openmp() #[[ATTR9]] -; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4 -; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__13 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__13_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0) -; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** -; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__14 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__14_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0) +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +; NVPTX-DISABLED-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +; NVPTX-DISABLED-NEXT: [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__13 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__13_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0) +; NVPTX-DISABLED-NEXT: [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8** +; NVPTX-DISABLED-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__14 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__14_wrapper to i8*), i8** noundef [[TMP3]], i64 noundef 0) ; NVPTX-DISABLED-NEXT: ret void ; ; @@ -4016,7 +4039,8 @@ ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 ; NVPTX-DISABLED-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -; NVPTX-DISABLED-NEXT: [[CMP:%.*]] = icmp eq i32 [[A]], 0 +; NVPTX-DISABLED-NEXT: [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4 +; NVPTX-DISABLED-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0 ; NVPTX-DISABLED-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]] ; NVPTX-DISABLED: if.then: ; NVPTX-DISABLED-NEXT: br label [[RETURN:%.*]] diff --git a/llvm/test/Transforms/OpenMP/replace_globalization.ll b/llvm/test/Transforms/OpenMP/replace_globalization.ll --- a/llvm/test/Transforms/OpenMP/replace_globalization.ll +++ b/llvm/test/Transforms/OpenMP/replace_globalization.ll @@ -84,7 +84,15 @@ ret void } -declare i8* @__kmpc_alloc_shared(i64) +@offset =global i32 undef +@stack = internal addrspace(3) global [1024 x i8] undef +define private i8* @__kmpc_alloc_shared(i64) { + %bc = bitcast [1024 x i8] addrspace(3) * @stack to i8 addrspace(3) * + %ac = addrspacecast i8 addrspace(3) * %bc to i8* + %l = load i32, i32* @offset + %gep = getelementptr i8, i8* %ac, i32 %l + ret i8* %gep +} declare void @__kmpc_free_shared(i8*, i64) @@ -122,33 +130,41 @@ ; CHECK: @[[S:[a-zA-Z0-9_$"\\.-]+]] = external local_unnamed_addr global i8* ; CHECK: @[[GLOB0:[0-9]+]] = private unnamed_addr constant [113 x i8] c" ; CHECK: @[[GLOB1:[0-9]+]] = private unnamed_addr constant [[STRUCT_IDENT_T:%.*]] { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([113 x i8], [113 x i8]* @[[GLOB0]], i32 0, i32 0) }, align 8 +; CHECK: @[[OFFSET:[a-zA-Z0-9_$"\\.-]+]] = global i32 undef +; CHECK: @[[STACK:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [1024 x i8] undef ; CHECK: @[[X_SHARED:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [16 x i8] undef, align 4 ; CHECK: @[[Y_SHARED:[a-zA-Z0-9_$"\\.-]+]] = internal addrspace(3) global [4 x i8] undef, align 4 ;. ; CHECK-LABEL: define {{[^@]+}}@foo() { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[C:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) -; CHECK-NEXT: [[X:%.*]] = call align 4 i8* @__kmpc_alloc_shared(i64 4) #[[ATTR5:[0-9]+]] -; CHECK-NEXT: call void @unknown_no_openmp() #[[ATTR4:[0-9]+]] -; CHECK-NEXT: call void @use.internalized(i8* nofree align 4 [[X]]) #[[ATTR6:[0-9]+]] -; CHECK-NEXT: call void @__kmpc_free_shared(i8* [[X]], i64 4) #[[ATTR5]] +; CHECK-NEXT: [[X:%.*]] = call align 4 i8* @__kmpc_alloc_shared(i64 noundef 4) #[[ATTR6:[0-9]+]] +; CHECK-NEXT: call void @unknown_no_openmp() #[[ATTR5:[0-9]+]] +; CHECK-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* [[X]] to i32* +; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[X_ON_STACK]] to i8* +; CHECK-NEXT: call void @use.internalized(i8* nofree align 4 [[TMP0]]) #[[ATTR7:[0-9]+]] +; CHECK-NEXT: call void @__kmpc_free_shared(i8* [[X]], i64 4) #[[ATTR8:[0-9]+]] ; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) ; CHECK-NEXT: ret void ; ; ; CHECK-LABEL: define {{[^@]+}}@bar() { ; CHECK-NEXT: [[C:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 1, i1 false, i1 true) -; CHECK-NEXT: call void @unknown_no_openmp() #[[ATTR4]] +; CHECK-NEXT: call void @unknown_no_openmp() #[[ATTR5]] ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[C]], -1 ; CHECK-NEXT: br i1 [[CMP]], label [[MASTER1:%.*]], label [[EXIT:%.*]] ; CHECK: master1: -; CHECK-NEXT: call void @use.internalized(i8* nofree align 4 addrspacecast (i8 addrspace(3)* getelementptr inbounds ([16 x i8], [16 x i8] addrspace(3)* @x_shared, i32 0, i32 0) to i8*)) #[[ATTR6]] +; CHECK-NEXT: [[X_ON_STACK:%.*]] = bitcast i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([16 x i8], [16 x i8] addrspace(3)* @x_shared, i32 0, i32 0) to i8*) to [4 x i32]* +; CHECK-NEXT: [[A0:%.*]] = bitcast [4 x i32]* [[X_ON_STACK]] to i8* +; CHECK-NEXT: call void @use.internalized(i8* nofree align 4 [[A0]]) #[[ATTR7]] ; CHECK-NEXT: br label [[NEXT:%.*]] ; CHECK: next: -; CHECK-NEXT: call void @unknown_no_openmp() #[[ATTR4]] +; CHECK-NEXT: call void @unknown_no_openmp() #[[ATTR5]] ; CHECK-NEXT: br label [[MASTER2:%.*]] ; CHECK: master2: -; CHECK-NEXT: call void @use.internalized(i8* nofree align 4 addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @y_shared, i32 0, i32 0) to i8*)) #[[ATTR6]] +; CHECK-NEXT: [[Y_ON_STACK:%.*]] = bitcast i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([4 x i8], [4 x i8] addrspace(3)* @y_shared, i32 0, i32 0) to i8*) to [4 x i32]* +; CHECK-NEXT: [[B1:%.*]] = bitcast [4 x i32]* [[Y_ON_STACK]] to i8* +; CHECK-NEXT: call void @use.internalized(i8* nofree align 4 [[B1]]) #[[ATTR7]] ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 1, i1 true) @@ -157,13 +173,15 @@ ; ; CHECK-LABEL: define {{[^@]+}}@baz_spmd() { ; CHECK-NEXT: [[C:%.*]] = call i32 @__kmpc_target_init(%struct.ident_t* @[[GLOB1]], i8 2, i1 true, i1 true) -; CHECK-NEXT: call void @unknown_no_openmp() #[[ATTR4]] +; CHECK-NEXT: call void @unknown_no_openmp() #[[ATTR5]] ; CHECK-NEXT: [[C0:%.*]] = icmp eq i32 [[C]], -1 ; CHECK-NEXT: br i1 [[C0]], label [[MASTER3:%.*]], label [[EXIT:%.*]] ; CHECK: master3: -; CHECK-NEXT: [[Z:%.*]] = call align 4 i8* @__kmpc_alloc_shared(i64 24) #[[ATTR5]], !dbg [[DBG9:![0-9]+]] -; CHECK-NEXT: call void @use.internalized(i8* nofree writeonly align 4 [[Z]]) #[[ATTR6]] -; CHECK-NEXT: call void @__kmpc_free_shared(i8* [[Z]], i64 24) #[[ATTR5]] +; CHECK-NEXT: [[Z:%.*]] = call align 4 i8* @__kmpc_alloc_shared(i64 noundef 24) #[[ATTR6]], !dbg [[DBG9:![0-9]+]] +; CHECK-NEXT: [[Z_ON_STACK:%.*]] = bitcast i8* [[Z]] to [6 x i32]* +; CHECK-NEXT: [[C1:%.*]] = bitcast [6 x i32]* [[Z_ON_STACK]] to i8* +; CHECK-NEXT: call void @use.internalized(i8* nofree writeonly align 4 [[C1]]) #[[ATTR7]] +; CHECK-NEXT: call void @__kmpc_free_shared(i8* [[Z]], i64 24) #[[ATTR8]] ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: ; CHECK-NEXT: call void @__kmpc_target_deinit(%struct.ident_t* @[[GLOB1]], i8 2, i1 true) @@ -184,14 +202,23 @@ ; CHECK-NEXT: store i8* [[X]], i8** @S, align 8 ; CHECK-NEXT: ret void ; +; +; CHECK-LABEL: define {{[^@]+}}@__kmpc_alloc_shared +; CHECK-SAME: (i64 [[TMP0:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: [[L:%.*]] = load i32, i32* @offset, align 4 +; CHECK-NEXT: [[GEP:%.*]] = getelementptr i8, i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds ([1024 x i8], [1024 x i8] addrspace(3)* @stack, i32 0, i32 0) to i8*), i32 [[L]] +; CHECK-NEXT: ret i8* [[GEP]] +; ;. ; CHECK: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn writeonly } -; CHECK: attributes #[[ATTR1:[0-9]+]] = { nosync nounwind } -; CHECK: attributes #[[ATTR2:[0-9]+]] = { nounwind readnone speculatable } -; CHECK: attributes #[[ATTR3:[0-9]+]] = { nofree nosync nounwind readnone speculatable willreturn } -; CHECK: attributes #[[ATTR4]] = { "llvm.assume"="omp_no_openmp" } -; CHECK: attributes #[[ATTR5]] = { nounwind } -; CHECK: attributes #[[ATTR6]] = { nounwind writeonly } +; CHECK: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readonly willreturn allocsize(0) } +; CHECK: attributes #[[ATTR2:[0-9]+]] = { nosync nounwind } +; CHECK: attributes #[[ATTR3:[0-9]+]] = { nounwind readnone speculatable } +; CHECK: attributes #[[ATTR4:[0-9]+]] = { nofree nosync nounwind readnone speculatable willreturn } +; CHECK: attributes #[[ATTR5]] = { "llvm.assume"="omp_no_openmp" } +; CHECK: attributes #[[ATTR6]] = { nounwind readonly } +; CHECK: attributes #[[ATTR7]] = { nounwind writeonly } +; CHECK: attributes #[[ATTR8]] = { nounwind } ;. ; CHECK: [[META0:![0-9]+]] = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 12.0.0", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None) ; CHECK: [[META1:![0-9]+]] = !DIFile(filename: "replace_globalization.c", directory: "/tmp/replace_globalization.c") diff --git a/llvm/test/Transforms/OpenMP/spmdization.ll b/llvm/test/Transforms/OpenMP/spmdization.ll --- a/llvm/test/Transforms/OpenMP/spmdization.ll +++ b/llvm/test/Transforms/OpenMP/spmdization.ll @@ -2169,25 +2169,25 @@ ; Function Attrs: alwaysinline convergent nounwind define internal void @.omp_outlined.(i32 %.global_tid., i32* noalias %.part_id., i8* noalias %.privates., void (i8*, ...)* noalias %.copy_fn., i8* %.task_t., %struct.anon* noalias %__context) #9 { ; AMDGPU-LABEL: define {{[^@]+}}@.omp_outlined. -; AMDGPU-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone [[DOTPART_ID_:%.*]], i8* noalias nocapture nofree readnone align 4294967296 [[DOTPRIVATES_:%.*]], void (i8*, ...)* noalias nocapture nofree readnone [[DOTCOPY_FN_:%.*]], i8* noalias nocapture nofree nonnull readnone align 8 dereferenceable(8) [[DOTTASK_T_:%.*]], %struct.anon* noalias nocapture nofree readnone [[__CONTEXT:%.*]]) #[[ATTR0]] { +; AMDGPU-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone [[DOTPART_ID_:%.*]], i8* noalias nocapture nofree readnone align 4294967296 [[DOTPRIVATES_:%.*]], void (i8*, ...)* noalias nocapture nofree readnone align 4294967296 [[DOTCOPY_FN_:%.*]], i8* noalias nocapture nofree nonnull readnone align 8 dereferenceable(8) [[DOTTASK_T_:%.*]], %struct.anon* noalias nocapture nofree readnone [[__CONTEXT:%.*]]) #[[ATTR0]] { ; AMDGPU-NEXT: entry: ; AMDGPU-NEXT: call void @spmd_amenable() #[[ATTR5]] ; AMDGPU-NEXT: ret void ; ; NVPTX-LABEL: define {{[^@]+}}@.omp_outlined. -; NVPTX-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone [[DOTPART_ID_:%.*]], i8* noalias nocapture nofree readnone align 4294967296 [[DOTPRIVATES_:%.*]], void (i8*, ...)* noalias nocapture nofree readnone [[DOTCOPY_FN_:%.*]], i8* noalias nocapture nofree nonnull readnone align 8 dereferenceable(8) [[DOTTASK_T_:%.*]], %struct.anon* noalias nocapture nofree readnone [[__CONTEXT:%.*]]) #[[ATTR0]] { +; NVPTX-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone [[DOTPART_ID_:%.*]], i8* noalias nocapture nofree readnone align 4294967296 [[DOTPRIVATES_:%.*]], void (i8*, ...)* noalias nocapture nofree readnone align 4294967296 [[DOTCOPY_FN_:%.*]], i8* noalias nocapture nofree nonnull readnone align 8 dereferenceable(8) [[DOTTASK_T_:%.*]], %struct.anon* noalias nocapture nofree readnone [[__CONTEXT:%.*]]) #[[ATTR0]] { ; NVPTX-NEXT: entry: ; NVPTX-NEXT: call void @spmd_amenable() #[[ATTR5]] ; NVPTX-NEXT: ret void ; ; AMDGPU-DISABLED-LABEL: define {{[^@]+}}@.omp_outlined. -; AMDGPU-DISABLED-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone [[DOTPART_ID_:%.*]], i8* noalias nocapture nofree readnone align 4294967296 [[DOTPRIVATES_:%.*]], void (i8*, ...)* noalias nocapture nofree readnone [[DOTCOPY_FN_:%.*]], i8* noalias nocapture nofree nonnull readnone align 8 dereferenceable(8) [[DOTTASK_T_:%.*]], %struct.anon* noalias nocapture nofree readnone [[__CONTEXT:%.*]]) #[[ATTR0]] { +; AMDGPU-DISABLED-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone [[DOTPART_ID_:%.*]], i8* noalias nocapture nofree readnone align 4294967296 [[DOTPRIVATES_:%.*]], void (i8*, ...)* noalias nocapture nofree readnone align 4294967296 [[DOTCOPY_FN_:%.*]], i8* noalias nocapture nofree nonnull readnone align 8 dereferenceable(8) [[DOTTASK_T_:%.*]], %struct.anon* noalias nocapture nofree readnone [[__CONTEXT:%.*]]) #[[ATTR0]] { ; AMDGPU-DISABLED-NEXT: entry: ; AMDGPU-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR5]] ; AMDGPU-DISABLED-NEXT: ret void ; ; NVPTX-DISABLED-LABEL: define {{[^@]+}}@.omp_outlined. -; NVPTX-DISABLED-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone [[DOTPART_ID_:%.*]], i8* noalias nocapture nofree readnone align 4294967296 [[DOTPRIVATES_:%.*]], void (i8*, ...)* noalias nocapture nofree readnone [[DOTCOPY_FN_:%.*]], i8* noalias nocapture nofree nonnull readnone align 8 dereferenceable(8) [[DOTTASK_T_:%.*]], %struct.anon* noalias nocapture nofree readnone [[__CONTEXT:%.*]]) #[[ATTR0]] { +; NVPTX-DISABLED-SAME: (i32 [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree nonnull readnone [[DOTPART_ID_:%.*]], i8* noalias nocapture nofree readnone align 4294967296 [[DOTPRIVATES_:%.*]], void (i8*, ...)* noalias nocapture nofree readnone align 4294967296 [[DOTCOPY_FN_:%.*]], i8* noalias nocapture nofree nonnull readnone align 8 dereferenceable(8) [[DOTTASK_T_:%.*]], %struct.anon* noalias nocapture nofree readnone [[__CONTEXT:%.*]]) #[[ATTR0]] { ; NVPTX-DISABLED-NEXT: entry: ; NVPTX-DISABLED-NEXT: call void @spmd_amenable() #[[ATTR5]] ; NVPTX-DISABLED-NEXT: ret void diff --git a/llvm/test/tools/UpdateTestChecks/update_analyze_test_checks/Inputs/x86-filter.ll b/llvm/test/tools/UpdateTestChecks/update_analyze_test_checks/Inputs/x86-filter.ll new file mode 100644 --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_analyze_test_checks/Inputs/x86-filter.ll @@ -0,0 +1,9 @@ +; RUN: opt < %s -passes='print' -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2 + +define void @replication_i64_stride2() nounwind { + %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <4 x i32> + %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <8 x i32> + %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <16 x i32> + %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <32 x i32> + ret void +} diff --git a/llvm/test/tools/UpdateTestChecks/update_analyze_test_checks/Inputs/x86-filter.ll.filter.expected b/llvm/test/tools/UpdateTestChecks/update_analyze_test_checks/Inputs/x86-filter.ll.filter.expected new file mode 100644 --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_analyze_test_checks/Inputs/x86-filter.ll.filter.expected @@ -0,0 +1,14 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --filter "(vf4|vf16)" +; RUN: opt < %s -passes='print' -mtriple=x86_64-pc-linux-gnu 2>&1 -disable-output -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2 + +define void @replication_i64_stride2() nounwind { +; SSE2-LABEL: 'replication_i64_stride2' +; SSE2: Cost Model: Found an estimated cost of 18 for instruction: %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <8 x i32> +; SSE2: Cost Model: Found an estimated cost of 72 for instruction: %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <32 x i32> +; + %vf2 = shufflevector <2 x i64> undef, <2 x i64> poison, <4 x i32> + %vf4 = shufflevector <4 x i64> undef, <4 x i64> poison, <8 x i32> + %vf8 = shufflevector <8 x i64> undef, <8 x i64> poison, <16 x i32> + %vf16 = shufflevector <16 x i64> undef, <16 x i64> poison, <32 x i32> + ret void +} diff --git a/llvm/test/tools/UpdateTestChecks/update_analyze_test_checks/filter.test b/llvm/test/tools/UpdateTestChecks/update_analyze_test_checks/filter.test new file mode 100644 --- /dev/null +++ b/llvm/test/tools/UpdateTestChecks/update_analyze_test_checks/filter.test @@ -0,0 +1,13 @@ +# REQUIRES: x86-registered-target + +## Check that --filter works properly. +# RUN: cp -f %S/Inputs/x86-filter.ll %t.ll && %update_analyze_test_checks --filter="(vf4|vf16)" %t.ll +# RUN: diff -u %t.ll %S/Inputs/x86-filter.ll.filter.expected + +## Check that running the script again does not change the result: +# RUN: %update_analyze_test_checks --filter="(vf4|vf16)" %t.ll +# RUN: diff -u %t.ll %S/Inputs/x86-filter.ll.filter.expected + +## Check that running the script again, without arguments, does not change the result: +# RUN: %update_analyze_test_checks %t.ll +# RUN: diff -u %t.ll %S/Inputs/x86-filter.ll.filter.expected diff --git a/llvm/test/tools/llvm-objcopy/MachO/bitcode-strip-remove.test b/llvm/test/tools/llvm-objcopy/MachO/bitcode-strip-remove-nonempty-segment.test copy from llvm/test/tools/llvm-objcopy/MachO/bitcode-strip-remove.test copy to llvm/test/tools/llvm-objcopy/MachO/bitcode-strip-remove-nonempty-segment.test --- a/llvm/test/tools/llvm-objcopy/MachO/bitcode-strip-remove.test +++ b/llvm/test/tools/llvm-objcopy/MachO/bitcode-strip-remove-nonempty-segment.test @@ -1,7 +1,7 @@ -## Test bitcode section removal. +## Test bitcode segment is not removed when not empty. # RUN: yaml2obj %s -o %t # RUN: llvm-bitcode-strip -r %t -o %t2 -# RUN: llvm-readobj --sections %t2 | FileCheck --implicit-check-not=Name: %s +# RUN: llvm-readobj --macho-segment --sections %t2 | FileCheck --implicit-check-not=Name: %s # CHECK: Name: __text # CHECK-NEXT: Segment: __TEXT @@ -9,6 +9,12 @@ # CHECK-NEXT: Segment: __DATA # CHECK: Name: __notbundle # CHECK-NEXT: Segment: __LLVM +# CHECK: Cmd: LC_SEGMENT_64 +# CHECK-NEXT: Name: __TEXT +# CHECK: Cmd: LC_SEGMENT_64 +# CHECK-NEXT: Name: __DATA +# CHECK: Cmd: LC_SEGMENT_64 +# CHECK-NEXT: Name: __LLVM --- !mach-o FileHeader: @@ -16,21 +22,21 @@ cputype: 0x01000007 cpusubtype: 0x00000003 filetype: 0x00000001 - ncmds: 1 - sizeofcmds: 392 + ncmds: 3 + sizeofcmds: 536 flags: 0x00002000 reserved: 0x00000000 LoadCommands: - cmd: LC_SEGMENT_64 - cmdsize: 392 - segname: '' + cmdsize: 152 + segname: __TEXT vmaddr: 0 - vmsize: 16 - fileoff: 424 - filesize: 16 + vmsize: 4 + fileoff: 568 + filesize: 4 maxprot: 7 initprot: 7 - nsects: 4 + nsects: 1 flags: 0 Sections: - sectname: __text @@ -38,7 +44,7 @@ addr: 0x0000000000000000 content: 'AABBCCDD' size: 4 - offset: 424 + offset: 568 align: 0 reloff: 0x00000000 nreloc: 0 @@ -46,12 +52,24 @@ reserved1: 0x00000000 reserved2: 0x00000000 reserved3: 0x00000000 + - cmd: LC_SEGMENT_64 + cmdsize: 152 + segname: __DATA + vmaddr: 4 + vmsize: 4 + fileoff: 572 + filesize: 4 + maxprot: 7 + initprot: 7 + nsects: 1 + flags: 0 + Sections: - sectname: __bundle segname: __DATA addr: 0x0000000000000004 content: 'DDAADDAA' size: 4 - offset: 428 + offset: 572 align: 0 reloff: 0x00000000 nreloc: 0 @@ -59,12 +77,24 @@ reserved1: 0x00000000 reserved2: 0x00000000 reserved3: 0x00000000 + - cmd: LC_SEGMENT_64 + cmdsize: 232 + segname: __LLVM + vmaddr: 8 + vmsize: 8 + fileoff: 576 + filesize: 8 + maxprot: 7 + initprot: 7 + nsects: 2 + flags: 0 + Sections: - sectname: __bundle segname: __LLVM addr: 0x0000000000000008 content: 'EEFFEEFF' size: 4 - offset: 432 + offset: 576 align: 0 reloff: 0x00000000 nreloc: 0 @@ -77,7 +107,7 @@ addr: 0x0000000000000008 content: 'EEFFEEFF' size: 4 - offset: 436 + offset: 580 align: 0 reloff: 0x00000000 nreloc: 0 diff --git a/llvm/test/tools/llvm-objcopy/MachO/bitcode-strip-remove.test b/llvm/test/tools/llvm-objcopy/MachO/bitcode-strip-remove.test --- a/llvm/test/tools/llvm-objcopy/MachO/bitcode-strip-remove.test +++ b/llvm/test/tools/llvm-objcopy/MachO/bitcode-strip-remove.test @@ -1,14 +1,16 @@ -## Test bitcode section removal. +## Test bitcode section and segment removal. # RUN: yaml2obj %s -o %t # RUN: llvm-bitcode-strip -r %t -o %t2 -# RUN: llvm-readobj --sections %t2 | FileCheck --implicit-check-not=Name: %s +# RUN: llvm-readobj --macho-segment --sections %t2 | FileCheck --implicit-check-not=Name: %s # CHECK: Name: __text # CHECK-NEXT: Segment: __TEXT # CHECK: Name: __bundle # CHECK-NEXT: Segment: __DATA -# CHECK: Name: __notbundle -# CHECK-NEXT: Segment: __LLVM +# CHECK: Cmd: LC_SEGMENT_64 +# CHECK-NEXT: Name: __TEXT +# CHECK: Cmd: LC_SEGMENT_64 +# CHECK-NEXT: Name: __DATA --- !mach-o FileHeader: @@ -16,21 +18,21 @@ cputype: 0x01000007 cpusubtype: 0x00000003 filetype: 0x00000001 - ncmds: 1 - sizeofcmds: 392 + ncmds: 3 + sizeofcmds: 456 flags: 0x00002000 reserved: 0x00000000 LoadCommands: - cmd: LC_SEGMENT_64 - cmdsize: 392 - segname: '' + cmdsize: 152 + segname: __TEXT vmaddr: 0 - vmsize: 16 - fileoff: 424 - filesize: 16 + vmsize: 4 + fileoff: 488 + filesize: 4 maxprot: 7 initprot: 7 - nsects: 4 + nsects: 1 flags: 0 Sections: - sectname: __text @@ -38,7 +40,7 @@ addr: 0x0000000000000000 content: 'AABBCCDD' size: 4 - offset: 424 + offset: 488 align: 0 reloff: 0x00000000 nreloc: 0 @@ -46,12 +48,24 @@ reserved1: 0x00000000 reserved2: 0x00000000 reserved3: 0x00000000 + - cmd: LC_SEGMENT_64 + cmdsize: 152 + segname: __DATA + vmaddr: 4 + vmsize: 4 + fileoff: 492 + filesize: 4 + maxprot: 7 + initprot: 7 + nsects: 1 + flags: 0 + Sections: - sectname: __bundle segname: __DATA addr: 0x0000000000000004 content: 'DDAADDAA' size: 4 - offset: 428 + offset: 492 align: 0 reloff: 0x00000000 nreloc: 0 @@ -59,25 +73,24 @@ reserved1: 0x00000000 reserved2: 0x00000000 reserved3: 0x00000000 + - cmd: LC_SEGMENT_64 + cmdsize: 152 + segname: __LLVM + vmaddr: 8 + vmsize: 4 + fileoff: 496 + filesize: 4 + maxprot: 7 + initprot: 7 + nsects: 1 + flags: 0 + Sections: - sectname: __bundle segname: __LLVM addr: 0x0000000000000008 content: 'EEFFEEFF' size: 4 - offset: 432 - align: 0 - reloff: 0x00000000 - nreloc: 0 - flags: 0x00000000 - reserved1: 0x00000000 - reserved2: 0x00000000 - reserved3: 0x00000000 - - sectname: __notbundle - segname: __LLVM - addr: 0x0000000000000008 - content: 'EEFFEEFF' - size: 4 - offset: 436 + offset: 496 align: 0 reloff: 0x00000000 nreloc: 0 diff --git a/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp b/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp --- a/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp +++ b/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp @@ -14,6 +14,7 @@ #include "llvm/BinaryFormat/COFF.h" #include "llvm/ObjCopy/CommonConfig.h" #include "llvm/ObjCopy/ConfigManager.h" +#include "llvm/ObjCopy/MachO/MachOConfig.h" #include "llvm/Option/Arg.h" #include "llvm/Option/ArgList.h" #include "llvm/Support/CRC.h" @@ -1189,6 +1190,7 @@ DriverConfig DC; ConfigManager ConfigMgr; CommonConfig &Config = ConfigMgr.Common; + MachOConfig &MachOConfig = ConfigMgr.MachO; BitcodeStripOptTable T; unsigned MissingArgumentIndex, MissingArgumentCount; opt::InputArgList InputArgs = @@ -1233,9 +1235,11 @@ if (!InputArgs.hasArg(BITCODE_STRIP_remove)) return createStringError(errc::invalid_argument, "no action specified"); - // We only support -r for now, which removes all bitcode sections. + // We only support -r for now, which removes all bitcode sections and + // the __LLVM segment if it's now empty. cantFail(Config.ToRemove.addMatcher(NameOrPattern::create( "__LLVM,__bundle", MatchStyle::Literal, ErrorCallback))); + MachOConfig.EmptySegmentsToRemove.insert("__LLVM"); DC.CopyConfigs.push_back(std::move(ConfigMgr)); return std::move(DC); diff --git a/llvm/unittests/IR/CMakeLists.txt b/llvm/unittests/IR/CMakeLists.txt --- a/llvm/unittests/IR/CMakeLists.txt +++ b/llvm/unittests/IR/CMakeLists.txt @@ -41,6 +41,7 @@ ValueHandleTest.cpp ValueMapTest.cpp ValueTest.cpp + VectorBuilderTest.cpp VectorTypesTest.cpp VerifierTest.cpp VPIntrinsicTest.cpp diff --git a/llvm/unittests/IR/VectorBuilderTest.cpp b/llvm/unittests/IR/VectorBuilderTest.cpp new file mode 100644 --- /dev/null +++ b/llvm/unittests/IR/VectorBuilderTest.cpp @@ -0,0 +1,280 @@ +//===--------- VectorBuilderTest.cpp - VectorBuilder unit tests -----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/IR/VectorBuilder.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "gtest/gtest.h" + +using namespace llvm; + +namespace { + +static unsigned VectorNumElements = 8; + +class VectorBuilderTest : public testing::Test { +protected: + LLVMContext Context; + + VectorBuilderTest() : Context() {} + + std::unique_ptr createBuilderModule(Function *&Func, BasicBlock *&BB, + Value *&Mask, Value *&EVL) { + auto Mod = std::make_unique("TestModule", Context); + auto *Int32Ty = Type::getInt32Ty(Context); + auto *Mask8Ty = + FixedVectorType::get(Type::getInt1Ty(Context), VectorNumElements); + auto *VoidFuncTy = + FunctionType::get(Type::getVoidTy(Context), {Mask8Ty, Int32Ty}, false); + Func = + Function::Create(VoidFuncTy, GlobalValue::ExternalLinkage, "bla", *Mod); + Mask = Func->getArg(0); + EVL = Func->getArg(1); + BB = BasicBlock::Create(Context, "entry", Func); + + return Mod; + } +}; + +/// Check that creating binary arithmetic VP intrinsics works. +TEST_F(VectorBuilderTest, TestCreateBinaryInstructions) { + Function *F; + BasicBlock *BB; + Value *Mask, *EVL; + auto Mod = createBuilderModule(F, BB, Mask, EVL); + + IRBuilder<> Builder(BB); + VectorBuilder VBuild(Builder); + VBuild.setMask(Mask).setEVL(EVL); + + auto *FloatVecTy = + FixedVectorType::get(Type::getFloatTy(Context), VectorNumElements); + auto *IntVecTy = + FixedVectorType::get(Type::getInt32Ty(Context), VectorNumElements); + +#define HANDLE_BINARY_INST(NUM, OPCODE, INSTCLASS) \ + { \ + auto VPID = VPIntrinsic::getForOpcode(Instruction::OPCODE); \ + bool IsFP = (#INSTCLASS)[0] == 'F'; \ + auto *ValueTy = IsFP ? FloatVecTy : IntVecTy; \ + Value *Op = UndefValue::get(ValueTy); \ + auto *I = VBuild.createVectorInstruction(Instruction::OPCODE, ValueTy, \ + {Op, Op}); \ + ASSERT_TRUE(isa(I)); \ + auto *VPIntrin = cast(I); \ + ASSERT_EQ(VPIntrin->getIntrinsicID(), VPID); \ + ASSERT_EQ(VPIntrin->getMaskParam(), Mask); \ + ASSERT_EQ(VPIntrin->getVectorLengthParam(), EVL); \ + } +#include "llvm/IR/Instruction.def" +} + +static bool isAllTrueMask(Value *Val, unsigned NumElements) { + auto *ConstMask = dyn_cast(Val); + if (!ConstMask) + return false; + + // Structure check. + if (!ConstMask->isAllOnesValue()) + return false; + + // Type check. + auto *MaskVecTy = cast(ConstMask->getType()); + if (MaskVecTy->getNumElements() != NumElements) + return false; + + return MaskVecTy->getElementType()->isIntegerTy(1); +} + +/// Check that creating binary arithmetic VP intrinsics works. +TEST_F(VectorBuilderTest, TestCreateBinaryInstructions_FixedVector_NoMask) { + Function *F; + BasicBlock *BB; + Value *Mask, *EVL; + auto Mod = createBuilderModule(F, BB, Mask, EVL); + + IRBuilder<> Builder(BB); + VectorBuilder VBuild(Builder); + VBuild.setEVL(EVL).setStaticVL(VectorNumElements); + + auto *FloatVecTy = + FixedVectorType::get(Type::getFloatTy(Context), VectorNumElements); + auto *IntVecTy = + FixedVectorType::get(Type::getInt32Ty(Context), VectorNumElements); + +#define HANDLE_BINARY_INST(NUM, OPCODE, INSTCLASS) \ + { \ + auto VPID = VPIntrinsic::getForOpcode(Instruction::OPCODE); \ + bool IsFP = (#INSTCLASS)[0] == 'F'; \ + Type *ValueTy = IsFP ? FloatVecTy : IntVecTy; \ + Value *Op = UndefValue::get(ValueTy); \ + auto *I = VBuild.createVectorInstruction(Instruction::OPCODE, ValueTy, \ + {Op, Op}); \ + ASSERT_TRUE(isa(I)); \ + auto *VPIntrin = cast(I); \ + ASSERT_EQ(VPIntrin->getIntrinsicID(), VPID); \ + ASSERT_TRUE(isAllTrueMask(VPIntrin->getMaskParam(), VectorNumElements)); \ + ASSERT_EQ(VPIntrin->getVectorLengthParam(), EVL); \ + } +#include "llvm/IR/Instruction.def" +} + +static bool isLegalConstEVL(Value *Val, unsigned ExpectedEVL) { + auto *ConstEVL = dyn_cast(Val); + if (!ConstEVL) + return false; + + // Value check. + if (ConstEVL->getZExtValue() != ExpectedEVL) + return false; + + // Type check. + return ConstEVL->getType()->isIntegerTy(32); +} + +/// Check that creating binary arithmetic VP intrinsics works. +TEST_F(VectorBuilderTest, TestCreateBinaryInstructions_FixedVector_NoEVL) { + Function *F; + BasicBlock *BB; + Value *Mask, *EVL; + auto Mod = createBuilderModule(F, BB, Mask, EVL); + + IRBuilder<> Builder(BB); + VectorBuilder VBuild(Builder); + VBuild.setMask(Mask).setStaticVL(VectorNumElements); + + auto *FloatVecTy = + FixedVectorType::get(Type::getFloatTy(Context), VectorNumElements); + auto *IntVecTy = + FixedVectorType::get(Type::getInt32Ty(Context), VectorNumElements); + +#define HANDLE_BINARY_INST(NUM, OPCODE, INSTCLASS) \ + { \ + auto VPID = VPIntrinsic::getForOpcode(Instruction::OPCODE); \ + bool IsFP = (#INSTCLASS)[0] == 'F'; \ + Type *ValueTy = IsFP ? FloatVecTy : IntVecTy; \ + Value *Op = UndefValue::get(ValueTy); \ + auto *I = VBuild.createVectorInstruction(Instruction::OPCODE, ValueTy, \ + {Op, Op}); \ + ASSERT_TRUE(isa(I)); \ + auto *VPIntrin = cast(I); \ + ASSERT_EQ(VPIntrin->getIntrinsicID(), VPID); \ + ASSERT_EQ(VPIntrin->getMaskParam(), Mask); \ + ASSERT_TRUE( \ + isLegalConstEVL(VPIntrin->getVectorLengthParam(), VectorNumElements)); \ + } +#include "llvm/IR/Instruction.def" +} + +/// Check that creating binary arithmetic VP intrinsics works. +TEST_F(VectorBuilderTest, + TestCreateBinaryInstructions_FixedVector_NoMask_NoEVL) { + Function *F; + BasicBlock *BB; + Value *Mask, *EVL; + auto Mod = createBuilderModule(F, BB, Mask, EVL); + + IRBuilder<> Builder(BB); + VectorBuilder VBuild(Builder); + VBuild.setStaticVL(VectorNumElements); + + auto *FloatVecTy = + FixedVectorType::get(Type::getFloatTy(Context), VectorNumElements); + auto *IntVecTy = + FixedVectorType::get(Type::getInt32Ty(Context), VectorNumElements); + +#define HANDLE_BINARY_INST(NUM, OPCODE, INSTCLASS) \ + { \ + auto VPID = VPIntrinsic::getForOpcode(Instruction::OPCODE); \ + bool IsFP = (#INSTCLASS)[0] == 'F'; \ + Type *ValueTy = IsFP ? FloatVecTy : IntVecTy; \ + Value *Op = UndefValue::get(ValueTy); \ + auto *I = VBuild.createVectorInstruction(Instruction::OPCODE, ValueTy, \ + {Op, Op}); \ + ASSERT_TRUE(isa(I)); \ + auto *VPIntrin = cast(I); \ + ASSERT_EQ(VPIntrin->getIntrinsicID(), VPID); \ + ASSERT_TRUE(isAllTrueMask(VPIntrin->getMaskParam(), VectorNumElements)); \ + ASSERT_TRUE( \ + isLegalConstEVL(VPIntrin->getVectorLengthParam(), VectorNumElements)); \ + } +#include "llvm/IR/Instruction.def" +} +/// Check that creating vp.load/vp.store works. +TEST_F(VectorBuilderTest, TestCreateLoadStore) { + Function *F; + BasicBlock *BB; + Value *Mask, *EVL; + auto Mod = createBuilderModule(F, BB, Mask, EVL); + + IRBuilder<> Builder(BB); + VectorBuilder VBuild(Builder); + VBuild.setMask(Mask).setEVL(EVL); + + auto *FloatVecTy = + FixedVectorType::get(Type::getFloatTy(Context), VectorNumElements); + auto *FloatVecPtrTy = FloatVecTy->getPointerTo(); + + Value *FloatVecPtr = UndefValue::get(FloatVecPtrTy); + Value *FloatVec = UndefValue::get(FloatVecTy); + + // vp.load + auto LoadVPID = VPIntrinsic::getForOpcode(Instruction::Load); + auto *LoadIntrin = VBuild.createVectorInstruction(Instruction::Load, + FloatVecTy, {FloatVecPtr}); + ASSERT_TRUE(isa(LoadIntrin)); + auto *VPLoad = cast(LoadIntrin); + ASSERT_EQ(VPLoad->getIntrinsicID(), LoadVPID); + ASSERT_EQ(VPLoad->getMemoryPointerParam(), FloatVecPtr); + + // vp.store + auto *VoidTy = Builder.getVoidTy(); + auto StoreVPID = VPIntrinsic::getForOpcode(Instruction::Store); + auto *StoreIntrin = VBuild.createVectorInstruction(Instruction::Store, VoidTy, + {FloatVec, FloatVecPtr}); + ASSERT_TRUE(isa(LoadIntrin)); + auto *VPStore = cast(StoreIntrin); + ASSERT_EQ(VPStore->getIntrinsicID(), StoreVPID); + ASSERT_EQ(VPStore->getMemoryPointerParam(), FloatVecPtr); + ASSERT_EQ(VPStore->getMemoryDataParam(), FloatVec); +} + +/// Check that the SilentlyReturnNone error handling mode works. +TEST_F(VectorBuilderTest, TestFail_SilentlyReturnNone) { + Function *F; + BasicBlock *BB; + Value *Mask, *EVL; + auto Mod = createBuilderModule(F, BB, Mask, EVL); + + IRBuilder<> Builder(BB); + auto *VoidTy = Builder.getVoidTy(); + VectorBuilder VBuild(Builder, VectorBuilder::Behavior::SilentlyReturnNone); + VBuild.setMask(Mask).setEVL(EVL); + auto *Val = VBuild.createVectorInstruction(Instruction::Br, VoidTy, {}); + ASSERT_EQ(Val, nullptr); +} + +/// Check that the ReportAndFail error handling mode aborts as advertised. +TEST_F(VectorBuilderTest, TestFail_ReportAndAbort) { + Function *F; + BasicBlock *BB; + Value *Mask, *EVL; + auto Mod = createBuilderModule(F, BB, Mask, EVL); + + IRBuilder<> Builder(BB); + auto *VoidTy = Builder.getVoidTy(); + VectorBuilder VBuild(Builder, VectorBuilder::Behavior::ReportAndAbort); + VBuild.setMask(Mask).setEVL(EVL); + ASSERT_DEATH({ VBuild.createVectorInstruction(Instruction::Br, VoidTy, {}); }, + "No VPIntrinsic for this opcode"); +} + +} // end anonymous namespace diff --git a/llvm/utils/UpdateTestChecks/common.py b/llvm/utils/UpdateTestChecks/common.py --- a/llvm/utils/UpdateTestChecks/common.py +++ b/llvm/utils/UpdateTestChecks/common.py @@ -916,12 +916,12 @@ check_label_format, False, preserve_names, global_vars_seen_dict, is_filtered) -def add_analyze_checks(output_lines, comment_marker, prefix_list, func_dict, func_name): +def add_analyze_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, is_filtered): check_label_format = '{} %s-LABEL: \'%s%s\''.format(comment_marker) global_vars_seen_dict = {} add_checks(output_lines, comment_marker, prefix_list, func_dict, func_name, check_label_format, False, True, global_vars_seen_dict, - is_filtered = False) + is_filtered) def build_global_values_dictionary(glob_val_dict, raw_tool_output, prefixes): for nameless_value in nameless_values: diff --git a/llvm/utils/gn/secondary/clang/lib/Tooling/Syntax/Pseudo/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/Tooling/Syntax/Pseudo/BUILD.gn --- a/llvm/utils/gn/secondary/clang/lib/Tooling/Syntax/Pseudo/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/lib/Tooling/Syntax/Pseudo/BUILD.gn @@ -7,13 +7,13 @@ "//llvm/lib/Support", ] sources = [ + "DirectiveMap.cpp", "Grammar.cpp", "GrammarBNF.cpp", "LRGraph.cpp", "LRTable.cpp", "LRTableBuild.cpp", "Lex.cpp", - "Preprocess.cpp", "Token.cpp", ] } diff --git a/llvm/utils/gn/secondary/clang/unittests/Tooling/Syntax/Pseudo/BUILD.gn b/llvm/utils/gn/secondary/clang/unittests/Tooling/Syntax/Pseudo/BUILD.gn --- a/llvm/utils/gn/secondary/clang/unittests/Tooling/Syntax/Pseudo/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/unittests/Tooling/Syntax/Pseudo/BUILD.gn @@ -11,9 +11,9 @@ "//llvm/lib/Testing/Support", ] sources = [ + "DirectiveMapTest.cpp", "GrammarTest.cpp", "LRTableTest.cpp", - "PreprocessTest.cpp", "TokenTest.cpp", ] } diff --git a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn --- a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn +++ b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn @@ -121,6 +121,7 @@ "__algorithm/pop_heap.h", "__algorithm/prev_permutation.h", "__algorithm/push_heap.h", + "__algorithm/ranges_max_element.h", "__algorithm/ranges_min_element.h", "__algorithm/ranges_swap_ranges.h", "__algorithm/remove.h", @@ -596,6 +597,7 @@ "type_traits", "typeindex", "typeinfo", + "uchar.h", "unordered_map", "unordered_set", "utility", diff --git a/llvm/utils/gn/secondary/llvm/lib/IR/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/IR/BUILD.gn --- a/llvm/utils/gn/secondary/llvm/lib/IR/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/IR/BUILD.gn @@ -76,6 +76,7 @@ "User.cpp", "Value.cpp", "ValueSymbolTable.cpp", + "VectorBuilder.cpp", "Verifier.cpp", ] } diff --git a/llvm/utils/gn/secondary/llvm/unittests/IR/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/IR/BUILD.gn --- a/llvm/utils/gn/secondary/llvm/unittests/IR/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/unittests/IR/BUILD.gn @@ -44,6 +44,7 @@ "ValueHandleTest.cpp", "ValueMapTest.cpp", "ValueTest.cpp", + "VectorBuilderTest.cpp", "VectorTypesTest.cpp", "VerifierTest.cpp", ] diff --git a/llvm/utils/update_analyze_test_checks.py b/llvm/utils/update_analyze_test_checks.py --- a/llvm/utils/update_analyze_test_checks.py +++ b/llvm/utils/update_analyze_test_checks.py @@ -32,19 +32,12 @@ from __future__ import print_function import argparse -import glob -import itertools import os # Used to advertise this file's name ("autogenerated_note"). -import string -import subprocess import sys -import tempfile import re from UpdateTestChecks import common -ADVERT = '; NOTE: Assertions have been autogenerated by ' - def main(): from argparse import RawTextHelpFormatter parser = argparse.ArgumentParser(description=__doc__, formatter_class=RawTextHelpFormatter) @@ -53,34 +46,26 @@ parser.add_argument( '--function', help='The function in the test file to update') parser.add_argument('tests', nargs='+') - args = common.parse_commandline_args(parser) + initial_args = common.parse_commandline_args(parser) script_name = os.path.basename(__file__) - autogenerated_note = (ADVERT + 'utils/' + script_name) - opt_basename = os.path.basename(args.opt_binary) + opt_basename = os.path.basename(initial_args.opt_binary) if (opt_basename != "opt"): common.error('Unexpected opt name: ' + opt_basename) sys.exit(1) - test_paths = [test for pattern in args.tests for test in glob.glob(pattern)] - for test in test_paths: - with open(test) as f: - input_lines = [l.rstrip() for l in f] - - first_line = input_lines[0] if input_lines else "" - if 'autogenerated' in first_line and script_name not in first_line: - common.warn("Skipping test which wasn't autogenerated by " + script_name + ": " + test) - continue - - if args.update_only: - if not first_line or 'autogenerated' not in first_line: - common.warn("Skipping test which isn't autogenerated: " + test) - continue + for ti in common.itertests(initial_args.tests, parser, + script_name='utils/' + script_name): + triple_in_ir = None + for l in ti.input_lines: + m = common.TRIPLE_IR_RE.match(l) + if m: + triple_in_ir = m.groups()[0] + break - run_lines = common.find_run_lines(test, input_lines) prefix_list = [] - for l in run_lines: + for l in ti.run_lines: if '|' not in l: common.warn('Skipping unparseable RUN line: ' + l) continue @@ -111,19 +96,19 @@ builder = common.FunctionTestBuilder( run_list = prefix_list, flags = type('', (object,), { - 'verbose': args.verbose, - 'filters': args.filters, + 'verbose': ti.args.verbose, + 'filters': ti.args.filters, 'function_signature': False, 'check_attributes': False, 'replace_value_regex': []}), scrubber_args = [], - path=test) + path=ti.path) for prefixes, opt_args in prefix_list: common.debug('Extracted opt cmd:', opt_basename, opt_args, file=sys.stderr) common.debug('Extracted FileCheck prefixes:', str(prefixes), file=sys.stderr) - raw_tool_outputs = common.invoke_tool(args.opt_binary, opt_args, test) + raw_tool_outputs = common.invoke_tool(ti.args.opt_binary, opt_args, ti.path) # Split analysis outputs by "Printing analysis " declarations. for raw_tool_output in re.split(r'Printing analysis ', raw_tool_outputs): @@ -136,9 +121,10 @@ prefix_set = set([prefix for prefixes, _ in prefix_list for prefix in prefixes]) common.debug('Rewriting FileCheck prefixes:', str(prefix_set), file=sys.stderr) output_lines = [] - output_lines.append(autogenerated_note) - for input_line in input_lines: + for input_info in ti.iterlines(output_lines): + input_line = input_info.line + args = input_info.args if is_in_function_start: if input_line == '': continue @@ -149,7 +135,8 @@ continue # Print out the various check lines here. - common.add_analyze_checks(output_lines, ';', prefix_list, func_dict, func_name) + common.add_analyze_checks(output_lines, ';', prefix_list, func_dict, func_name, + is_filtered=builder.is_filtered()) is_in_function_start = False if is_in_function: @@ -164,10 +151,6 @@ is_in_function = False continue - # Discard any previous script advertising. - if input_line.startswith(ADVERT): - continue - # If it's outside a function, it just gets copied to the output. output_lines.append(input_line) @@ -175,14 +158,14 @@ if not m: continue func_name = m.group(1) - if args.function is not None and func_name != args.function: + if ti.args.function is not None and func_name != ti.args.function: # When filtering on a specific function, skip all others. continue is_in_function = is_in_function_start = True - common.debug('Writing %d lines to %s...' % (len(output_lines), test)) + common.debug('Writing %d lines to %s...' % (len(output_lines), ti.path)) - with open(test, 'wb') as f: + with open(ti.path, 'wb') as f: f.writelines(['{}\n'.format(l).encode('utf-8') for l in output_lines]) diff --git a/mlir/docs/Diagnostics.md b/mlir/docs/Diagnostics.md --- a/mlir/docs/Diagnostics.md +++ b/mlir/docs/Diagnostics.md @@ -107,6 +107,18 @@ "Compose an interesting error: @foo, i32, (0, 1, 2)" ``` +Operations attached to a diagnostic will be printed in generic form if the +severity level is `Error`, otherwise custom operation printers will be used. +```c++ +// `anotherOp` will be printed in generic form, +// e.g. %3 = "arith.addf"(%arg4, %2) : (f32, f32) -> f32 +op->emitError() << anotherOp; + +// `anotherOp` will be printed using the custom printer, +// e.g. %3 = arith.addf %arg4, %2 : f32 +op->emitRemark() << anotherOp; +``` + ### Attaching notes Unlike many other compiler frameworks, notes in MLIR cannot be emitted directly. diff --git a/mlir/docs/OpDefinitions.md b/mlir/docs/OpDefinitions.md --- a/mlir/docs/OpDefinitions.md +++ b/mlir/docs/OpDefinitions.md @@ -601,6 +601,15 @@ verified. Verifiers further down the order can rely on certain invariants being verified by a previous verifier and do not need to re-verify them. +#### Emitting diagnostics in custom verifiers + +Custom verifiers should avoid printing operations using custom operation +printers, because they require the printed operation (and sometimes its parent +operation) to be verified first. In particular, when emitting diagnostics, +custom verifiers should use the `Error` severity level, which prints operations +in generic form by default, and avoid using lower severity levels (`Note`, +`Remark`, `Warning`). + ### Declarative Assembly Format The custom assembly form of the operation may be specified in a declarative diff --git a/mlir/examples/standalone/standalone-opt/standalone-opt.cpp b/mlir/examples/standalone/standalone-opt/standalone-opt.cpp --- a/mlir/examples/standalone/standalone-opt/standalone-opt.cpp +++ b/mlir/examples/standalone/standalone-opt/standalone-opt.cpp @@ -14,7 +14,7 @@ #include "mlir/Pass/Pass.h" #include "mlir/Pass/PassManager.h" #include "mlir/Support/FileUtilities.h" -#include "mlir/Support/MlirOptMain.h" +#include "mlir/Tools/mlir-opt/MlirOptMain.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/InitLLVM.h" #include "llvm/Support/SourceMgr.h" diff --git a/mlir/examples/standalone/standalone-translate/CMakeLists.txt b/mlir/examples/standalone/standalone-translate/CMakeLists.txt --- a/mlir/examples/standalone/standalone-translate/CMakeLists.txt +++ b/mlir/examples/standalone/standalone-translate/CMakeLists.txt @@ -17,7 +17,7 @@ MLIRParser MLIRPass MLIRSPIRV - MLIRTranslation + MLIRTranslateLib MLIRSupport ) diff --git a/mlir/examples/standalone/standalone-translate/standalone-translate.cpp b/mlir/examples/standalone/standalone-translate/standalone-translate.cpp --- a/mlir/examples/standalone/standalone-translate/standalone-translate.cpp +++ b/mlir/examples/standalone/standalone-translate/standalone-translate.cpp @@ -13,7 +13,7 @@ #include "mlir/InitAllTranslations.h" #include "mlir/Support/LogicalResult.h" -#include "mlir/Translation.h" +#include "mlir/Tools/mlir-translate/MlirTranslateMain.h" #include "Standalone/StandaloneDialect.h" diff --git a/mlir/examples/toy/Ch2/toyc.cpp b/mlir/examples/toy/Ch2/toyc.cpp --- a/mlir/examples/toy/Ch2/toyc.cpp +++ b/mlir/examples/toy/Ch2/toyc.cpp @@ -19,7 +19,7 @@ #include "mlir/IR/BuiltinOps.h" #include "mlir/IR/MLIRContext.h" #include "mlir/IR/Verifier.h" -#include "mlir/Parser.h" +#include "mlir/Parser/Parser.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/CommandLine.h" @@ -98,7 +98,7 @@ llvm::SourceMgr sourceMgr; sourceMgr.AddNewSourceBuffer(std::move(*fileOrErr), llvm::SMLoc()); mlir::OwningOpRef module = - mlir::parseSourceFile(sourceMgr, &context); + mlir::parseSourceFile(sourceMgr, &context); if (!module) { llvm::errs() << "Error can't load file " << inputFilename << "\n"; return 3; diff --git a/mlir/examples/toy/Ch3/toyc.cpp b/mlir/examples/toy/Ch3/toyc.cpp --- a/mlir/examples/toy/Ch3/toyc.cpp +++ b/mlir/examples/toy/Ch3/toyc.cpp @@ -18,7 +18,7 @@ #include "mlir/IR/BuiltinOps.h" #include "mlir/IR/MLIRContext.h" #include "mlir/IR/Verifier.h" -#include "mlir/Parser.h" +#include "mlir/Parser/Parser.h" #include "mlir/Pass/Pass.h" #include "mlir/Pass/PassManager.h" #include "mlir/Transforms/Passes.h" @@ -93,7 +93,7 @@ // Parse the input mlir. sourceMgr.AddNewSourceBuffer(std::move(*fileOrErr), llvm::SMLoc()); - module = mlir::parseSourceFile(sourceMgr, &context); + module = mlir::parseSourceFile(sourceMgr, &context); if (!module) { llvm::errs() << "Error can't load file " << inputFilename << "\n"; return 3; diff --git a/mlir/examples/toy/Ch4/toyc.cpp b/mlir/examples/toy/Ch4/toyc.cpp --- a/mlir/examples/toy/Ch4/toyc.cpp +++ b/mlir/examples/toy/Ch4/toyc.cpp @@ -19,7 +19,7 @@ #include "mlir/IR/BuiltinOps.h" #include "mlir/IR/MLIRContext.h" #include "mlir/IR/Verifier.h" -#include "mlir/Parser.h" +#include "mlir/Parser/Parser.h" #include "mlir/Pass/Pass.h" #include "mlir/Pass/PassManager.h" #include "mlir/Transforms/Passes.h" @@ -94,7 +94,7 @@ // Parse the input mlir. sourceMgr.AddNewSourceBuffer(std::move(*fileOrErr), llvm::SMLoc()); - module = mlir::parseSourceFile(sourceMgr, &context); + module = mlir::parseSourceFile(sourceMgr, &context); if (!module) { llvm::errs() << "Error can't load file " << inputFilename << "\n"; return 3; diff --git a/mlir/examples/toy/Ch5/toyc.cpp b/mlir/examples/toy/Ch5/toyc.cpp --- a/mlir/examples/toy/Ch5/toyc.cpp +++ b/mlir/examples/toy/Ch5/toyc.cpp @@ -21,7 +21,7 @@ #include "mlir/IR/MLIRContext.h" #include "mlir/IR/Verifier.h" #include "mlir/InitAllDialects.h" -#include "mlir/Parser.h" +#include "mlir/Parser/Parser.h" #include "mlir/Pass/Pass.h" #include "mlir/Pass/PassManager.h" #include "mlir/Transforms/Passes.h" @@ -98,7 +98,7 @@ // Parse the input mlir. sourceMgr.AddNewSourceBuffer(std::move(*fileOrErr), llvm::SMLoc()); - module = mlir::parseSourceFile(sourceMgr, &context); + module = mlir::parseSourceFile(sourceMgr, &context); if (!module) { llvm::errs() << "Error can't load file " << inputFilename << "\n"; return 3; diff --git a/mlir/examples/toy/Ch6/toyc.cpp b/mlir/examples/toy/Ch6/toyc.cpp --- a/mlir/examples/toy/Ch6/toyc.cpp +++ b/mlir/examples/toy/Ch6/toyc.cpp @@ -23,7 +23,7 @@ #include "mlir/IR/MLIRContext.h" #include "mlir/IR/Verifier.h" #include "mlir/InitAllDialects.h" -#include "mlir/Parser.h" +#include "mlir/Parser/Parser.h" #include "mlir/Pass/Pass.h" #include "mlir/Pass/PassManager.h" #include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h" @@ -119,7 +119,7 @@ // Parse the input mlir. llvm::SourceMgr sourceMgr; sourceMgr.AddNewSourceBuffer(std::move(*fileOrErr), llvm::SMLoc()); - module = mlir::parseSourceFile(sourceMgr, &context); + module = mlir::parseSourceFile(sourceMgr, &context); if (!module) { llvm::errs() << "Error can't load file " << inputFilename << "\n"; return 3; diff --git a/mlir/examples/toy/Ch7/toyc.cpp b/mlir/examples/toy/Ch7/toyc.cpp --- a/mlir/examples/toy/Ch7/toyc.cpp +++ b/mlir/examples/toy/Ch7/toyc.cpp @@ -23,7 +23,7 @@ #include "mlir/IR/MLIRContext.h" #include "mlir/IR/Verifier.h" #include "mlir/InitAllDialects.h" -#include "mlir/Parser.h" +#include "mlir/Parser/Parser.h" #include "mlir/Pass/Pass.h" #include "mlir/Pass/PassManager.h" #include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h" @@ -119,7 +119,7 @@ // Parse the input mlir. llvm::SourceMgr sourceMgr; sourceMgr.AddNewSourceBuffer(std::move(*fileOrErr), llvm::SMLoc()); - module = mlir::parseSourceFile(sourceMgr, &context); + module = mlir::parseSourceFile(sourceMgr, &context); if (!module) { llvm::errs() << "Error can't load file " << inputFilename << "\n"; return 3; diff --git a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h --- a/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h +++ b/mlir/include/mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h @@ -239,7 +239,7 @@ /// Add a bufferization state initializer that initializes the specified /// dialect-specific bufferization state. - void addDialectStateInitializer(StringRef name, DialectStateInitFn fn); + void addDialectStateInitializer(StringRef name, const DialectStateInitFn &fn); private: /// Allow a dialect. diff --git a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h --- a/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h +++ b/mlir/include/mlir/Dialect/Linalg/Utils/Utils.h @@ -397,7 +397,7 @@ FailureOr tileConsumerAndFuseProducers( OpBuilder &b, LinalgOp consumerOp, ArrayRef tileSizes, ArrayRef tileInterchange, - Optional tileDistribution); + const Optional &tileDistribution); //===----------------------------------------------------------------------===// // Generic op region utilities diff --git a/mlir/include/mlir/Dialect/Shape/Transforms/BufferizableOpInterfaceImpl.h b/mlir/include/mlir/Dialect/Shape/Transforms/BufferizableOpInterfaceImpl.h new file mode 100644 --- /dev/null +++ b/mlir/include/mlir/Dialect/Shape/Transforms/BufferizableOpInterfaceImpl.h @@ -0,0 +1,20 @@ +//===- BufferizableOpInterfaceImpl.h - Impl. of BufferizableOpInterface ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_DIALECT_SHAPE_BUFFERIZABLEOPINTERFACEIMPL_H +#define MLIR_DIALECT_SHAPE_BUFFERIZABLEOPINTERFACEIMPL_H + +namespace mlir { +class DialectRegistry; + +namespace shape { +void registerBufferizableOpInterfaceExternalModels(DialectRegistry ®istry); +} // namespace shape +} // namespace mlir + +#endif // MLIR_DIALECT_SHAPE_BUFFERIZABLEOPINTERFACEIMPL_H diff --git a/mlir/include/mlir/Dialect/Shape/Transforms/Passes.h b/mlir/include/mlir/Dialect/Shape/Transforms/Passes.h --- a/mlir/include/mlir/Dialect/Shape/Transforms/Passes.h +++ b/mlir/include/mlir/Dialect/Shape/Transforms/Passes.h @@ -40,21 +40,6 @@ void populateRemoveShapeConstraintsPatterns(RewritePatternSet &patterns); std::unique_ptr> createRemoveShapeConstraintsPass(); -/// Populates patterns for shape dialect structural type conversions and sets up -/// the provided ConversionTarget with the appropriate legality configuration -/// for the ops to get converted properly. -/// -/// A "structural" type conversion is one where the underlying ops are -/// completely agnostic to the actual types involved and simply need to update -/// their types consistently. An example of this is shape.assuming -- the -/// shape.assuming op and the corresponding shape.assuming_yield op need to have -/// consistent types, but the exact types don't matter. So all that we need to -/// do for a structural type conversion is to update both of their types -/// consistently to the new types prescribed by the TypeConverter. -void populateShapeStructuralTypeConversionsAndLegality( - TypeConverter &typeConverter, RewritePatternSet &patterns, - ConversionTarget &target); - // Bufferizes shape dialect ops. // // Note that most shape dialect ops must be converted to std before diff --git a/mlir/include/mlir/IR/OperationSupport.h b/mlir/include/mlir/IR/OperationSupport.h --- a/mlir/include/mlir/IR/OperationSupport.h +++ b/mlir/include/mlir/IR/OperationSupport.h @@ -726,6 +726,9 @@ /// Always print operations in the generic form. OpPrintingFlags &printGenericOpForm(); + /// Do not verify the operation when using custom operation printers. + OpPrintingFlags &assumeVerified(); + /// Use local scope when printing the operation. This allows for using the /// printer in a more localized and thread-safe setting, but may not /// necessarily be identical to what the IR will look like when dumping @@ -747,6 +750,9 @@ /// Return if operations should be printed in the generic form. bool shouldPrintGenericOpForm() const; + /// Return if operation verification should be skipped. + bool shouldAssumeVerified() const; + /// Return if the printer should use local scope when dumping the IR. bool shouldUseLocalScope() const; @@ -762,6 +768,9 @@ /// Print operations in the generic form. bool printGenericOpFormFlag : 1; + /// Skip operation verification. + bool assumeVerifiedFlag : 1; + /// Print operations with numberings local to the current operation. bool printLocalScope : 1; }; diff --git a/mlir/include/mlir/IR/Value.h b/mlir/include/mlir/IR/Value.h --- a/mlir/include/mlir/IR/Value.h +++ b/mlir/include/mlir/IR/Value.h @@ -24,6 +24,7 @@ class BlockArgument; class Operation; class OpOperand; +class OpPrintingFlags; class OpResult; class Region; class Value; @@ -215,6 +216,7 @@ // Utilities void print(raw_ostream &os); + void print(raw_ostream &os, const OpPrintingFlags &flags); void print(raw_ostream &os, AsmState &state); void dump(); diff --git a/mlir/include/mlir/Parser.h b/mlir/include/mlir/Parser/Parser.h rename from mlir/include/mlir/Parser.h rename to mlir/include/mlir/Parser/Parser.h --- a/mlir/include/mlir/Parser.h +++ b/mlir/include/mlir/Parser/Parser.h @@ -10,8 +10,8 @@ // //===----------------------------------------------------------------------===// -#ifndef MLIR_PARSER_H -#define MLIR_PARSER_H +#ifndef MLIR_PARSER_PARSER_H +#define MLIR_PARSER_PARSER_H #include "mlir/IR/Builders.h" #include "mlir/IR/BuiltinOps.h" @@ -206,21 +206,21 @@ /// TODO: These methods are deprecated in favor of the above template versions. /// They should be removed when usages have been updated. -inline OwningOpRef parseSourceFile(const llvm::SourceMgr &sourceMgr, - MLIRContext *context) { +[[deprecated("use parseSourceFile")]] inline OwningOpRef +parseSourceFile(const llvm::SourceMgr &sourceMgr, MLIRContext *context) { return parseSourceFile(sourceMgr, context); } -inline OwningOpRef parseSourceFile(llvm::StringRef filename, - MLIRContext *context) { +[[deprecated("use parseSourceFile")]] inline OwningOpRef +parseSourceFile(llvm::StringRef filename, MLIRContext *context) { return parseSourceFile(filename, context); } -inline OwningOpRef parseSourceFile(llvm::StringRef filename, - llvm::SourceMgr &sourceMgr, - MLIRContext *context) { +[[deprecated("use parseSourceFile")]] inline OwningOpRef +parseSourceFile(llvm::StringRef filename, llvm::SourceMgr &sourceMgr, + MLIRContext *context) { return parseSourceFile(filename, sourceMgr, context); } -inline OwningOpRef parseSourceString(llvm::StringRef moduleStr, - MLIRContext *context) { +[[deprecated("use parseSourceFile")]] inline OwningOpRef +parseSourceString(llvm::StringRef moduleStr, MLIRContext *context) { return parseSourceString(moduleStr, context); } @@ -268,4 +268,4 @@ } // namespace mlir -#endif // MLIR_PARSER_H +#endif // MLIR_PARSER_PARSER_H diff --git a/mlir/include/mlir/Support/DebugAction.h b/mlir/include/mlir/Support/DebugAction.h --- a/mlir/include/mlir/Support/DebugAction.h +++ b/mlir/include/mlir/Support/DebugAction.h @@ -194,7 +194,8 @@ class Handler : public DebugActionManager::HandlerBase { public: Handler() - : HandlerBase(TypeID::get::Handler>()) {} + : HandlerBase( + TypeID::get::Handler>()) {} /// This hook allows for controlling whether an action should execute or /// not. `parameters` correspond to the set of values provided by the @@ -207,7 +208,7 @@ /// Provide classof to allow casting between handler types. static bool classof(const DebugActionManager::HandlerBase *handler) { return handler->getHandlerID() == - TypeID::get::Handler>(); + TypeID::get::Handler>(); } }; diff --git a/mlir/include/mlir/Support/MlirOptMain.h b/mlir/include/mlir/Tools/mlir-opt/MlirOptMain.h rename from mlir/include/mlir/Support/MlirOptMain.h rename to mlir/include/mlir/Tools/mlir-opt/MlirOptMain.h --- a/mlir/include/mlir/Support/MlirOptMain.h +++ b/mlir/include/mlir/Tools/mlir-opt/MlirOptMain.h @@ -10,8 +10,8 @@ // //===----------------------------------------------------------------------===// -#ifndef MLIR_SUPPORT_MLIROPTMAIN_H -#define MLIR_SUPPORT_MLIROPTMAIN_H +#ifndef MLIR_TOOLS_MLIROPT_MLIROPTMAIN_H +#define MLIR_TOOLS_MLIROPT_MLIROPTMAIN_H #include "mlir/Support/LogicalResult.h" #include "llvm/ADT/StringRef.h" @@ -95,4 +95,4 @@ } // namespace mlir -#endif // MLIR_SUPPORT_MLIROPTMAIN_H +#endif // MLIR_TOOLS_MLIROPT_MLIROPTMAIN_H diff --git a/mlir/include/mlir/Tools/mlir-translate/MlirTranslateMain.h b/mlir/include/mlir/Tools/mlir-translate/MlirTranslateMain.h new file mode 100644 --- /dev/null +++ b/mlir/include/mlir/Tools/mlir-translate/MlirTranslateMain.h @@ -0,0 +1,28 @@ +//===- MlirTranslateMain.h - MLIR Translation Driver main -------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Main entry function for mlir-translate for when built as standalone binary. +// +//===----------------------------------------------------------------------===// + +#ifndef MLIR_TOOLS_MLIRTRANSLATE_MLIRTRANSLATEMAIN_H +#define MLIR_TOOLS_MLIRTRANSLATE_MLIRTRANSLATEMAIN_H + +#include "mlir/Support/LogicalResult.h" +#include "llvm/ADT/StringRef.h" + +namespace mlir { +/// Translate to/from an MLIR module from/to an external representation (e.g. +/// LLVM IR, SPIRV binary, ...). This is the entry point for the implementation +/// of tools like `mlir-translate`. The translation to perform is parsed from +/// the command line. The `toolName` argument is used for the header displayed +/// by `--help`. +LogicalResult mlirTranslateMain(int argc, char **argv, StringRef toolName); +} // namespace mlir + +#endif // MLIR_TOOLS_MLIRTRANSLATE_MLIRTRANSLATEMAIN_H diff --git a/mlir/include/mlir/Translation.h b/mlir/include/mlir/Tools/mlir-translate/Translation.h rename from mlir/include/mlir/Translation.h rename to mlir/include/mlir/Tools/mlir-translate/Translation.h --- a/mlir/include/mlir/Translation.h +++ b/mlir/include/mlir/Tools/mlir-translate/Translation.h @@ -9,8 +9,9 @@ // Registry for user-provided translations. // //===----------------------------------------------------------------------===// -#ifndef MLIR_TRANSLATION_H -#define MLIR_TRANSLATION_H + +#ifndef MLIR_TOOLS_MLIRTRANSLATE_TRANSLATION_H +#define MLIR_TOOLS_MLIRTRANSLATE_TRANSLATION_H #include "llvm/Support/CommandLine.h" @@ -96,14 +97,6 @@ size_t globalWidth) const override; }; -/// Translate to/from an MLIR module from/to an external representation (e.g. -/// LLVM IR, SPIRV binary, ...). This is the entry point for the implementation -/// of tools like `mlir-translate`. The translation to perform is parsed from -/// the command line. The `toolName` argument is used for the header displayed -/// by `--help`. -LogicalResult mlirTranslateMain(int argc, char **argv, - llvm::StringRef toolName); - } // namespace mlir -#endif // MLIR_TRANSLATION_H +#endif // MLIR_TOOLS_MLIRTRANSLATE_TRANSLATION_H diff --git a/mlir/lib/Analysis/Presburger/PresburgerSpace.cpp b/mlir/lib/Analysis/Presburger/PresburgerSpace.cpp --- a/mlir/lib/Analysis/Presburger/PresburgerSpace.cpp +++ b/mlir/lib/Analysis/Presburger/PresburgerSpace.cpp @@ -188,7 +188,7 @@ os << "Dimension: " << getNumDomainIds() << ", "; } os << "Symbols: " << getNumSymbolIds() << ", " - << "Locals" << getNumLocalIds() << "\n"; + << "Locals: " << getNumLocalIds() << "\n"; } void PresburgerLocalSpace::dump() const { print(llvm::errs()); } diff --git a/mlir/lib/CAPI/IR/IR.cpp b/mlir/lib/CAPI/IR/IR.cpp --- a/mlir/lib/CAPI/IR/IR.cpp +++ b/mlir/lib/CAPI/IR/IR.cpp @@ -20,7 +20,7 @@ #include "mlir/IR/Types.h" #include "mlir/IR/Verifier.h" #include "mlir/Interfaces/InferTypeOpInterface.h" -#include "mlir/Parser.h" +#include "mlir/Parser/Parser.h" #include "llvm/Support/Debug.h" #include diff --git a/mlir/lib/CMakeLists.txt b/mlir/lib/CMakeLists.txt --- a/mlir/lib/CMakeLists.txt +++ b/mlir/lib/CMakeLists.txt @@ -15,7 +15,6 @@ add_subdirectory(Target) add_subdirectory(Tools) add_subdirectory(Transforms) -add_subdirectory(Translation) # Only enable the ExecutionEngine if the native target is configured in. if(TARGET ${LLVM_NATIVE_ARCH}) diff --git a/mlir/lib/Conversion/GPUToVulkan/CMakeLists.txt b/mlir/lib/Conversion/GPUToVulkan/CMakeLists.txt --- a/mlir/lib/Conversion/GPUToVulkan/CMakeLists.txt +++ b/mlir/lib/Conversion/GPUToVulkan/CMakeLists.txt @@ -15,5 +15,5 @@ MLIRSPIRVSerialization MLIRSupport MLIRTransforms - MLIRTranslation + MLIRTranslateLib ) diff --git a/mlir/lib/Dialect/Affine/Utils/Utils.cpp b/mlir/lib/Dialect/Affine/Utils/Utils.cpp --- a/mlir/lib/Dialect/Affine/Utils/Utils.cpp +++ b/mlir/lib/Dialect/Affine/Utils/Utils.cpp @@ -618,6 +618,7 @@ AffineMap::get(origLbMap.getNumDims() + origUbMap.getNumDims(), origLbMap.getNumSymbols() + origUbMap.getNumSymbols(), newUbExprs, opBuilder.getContext()); + canonicalizeMapAndOperands(&newUbMap, &ubOperands); // Normalize the loop. op.setUpperBound(ubOperands, newUbMap); @@ -640,6 +641,7 @@ AffineExpr newIVExpr = origIVExpr * origLoopStep + origLbMap.getResult(0); AffineMap ivMap = AffineMap::get(origLbMap.getNumDims() + 1, origLbMap.getNumSymbols(), newIVExpr); + canonicalizeMapAndOperands(&ivMap, &lbOperands); Operation *newIV = opBuilder.create(loc, ivMap, lbOperands); op.getInductionVar().replaceAllUsesExcept(newIV->getResult(0), newIV); return success(); diff --git a/mlir/lib/Dialect/Arithmetic/IR/ArithmeticOps.cpp b/mlir/lib/Dialect/Arithmetic/IR/ArithmeticOps.cpp --- a/mlir/lib/Dialect/Arithmetic/IR/ArithmeticOps.cpp +++ b/mlir/lib/Dialect/Arithmetic/IR/ArithmeticOps.cpp @@ -1862,8 +1862,8 @@ OpFoldResult arith::ShLIOp::fold(ArrayRef operands) { // Don't fold if shifting more than the bit width. bool bounded = false; - auto result = - constFoldBinaryOp(operands, [&](APInt a, const APInt &b) { + auto result = constFoldBinaryOp( + operands, [&](const APInt &a, const APInt &b) { bounded = b.ule(b.getBitWidth()); return std::move(a).shl(b); }); diff --git a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp --- a/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp +++ b/mlir/lib/Dialect/Bufferization/IR/BufferizableOpInterface.cpp @@ -64,8 +64,8 @@ return nullptr; } -void BufferizationOptions::addDialectStateInitializer(StringRef name, - DialectStateInitFn fn) { +void BufferizationOptions::addDialectStateInitializer( + StringRef name, const DialectStateInitFn &fn) { stateInitializers.push_back( [=](BufferizationState &state) { state.insertDialectState(name, fn()); }); } diff --git a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp --- a/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp +++ b/mlir/lib/Dialect/GPU/Transforms/KernelOutlining.cpp @@ -22,7 +22,7 @@ #include "mlir/IR/Builders.h" #include "mlir/IR/Matchers.h" #include "mlir/IR/SymbolTable.h" -#include "mlir/Parser.h" +#include "mlir/Parser/Parser.h" #include "mlir/Support/LLVM.h" #include "mlir/Transforms/RegionUtils.h" diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgDialect.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgDialect.cpp --- a/mlir/lib/Dialect/Linalg/IR/LinalgDialect.cpp +++ b/mlir/lib/Dialect/Linalg/IR/LinalgDialect.cpp @@ -16,7 +16,7 @@ #include "mlir/IR/Dialect.h" #include "mlir/IR/DialectImplementation.h" #include "mlir/IR/FunctionInterfaces.h" -#include "mlir/Parser.h" +#include "mlir/Parser/Parser.h" #include "mlir/Support/LLVM.h" #include "mlir/Transforms/InliningUtils.h" diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp --- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp +++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp @@ -22,7 +22,7 @@ #include "mlir/IR/OpImplementation.h" #include "mlir/IR/PatternMatch.h" #include "mlir/Interfaces/InferTypeOpInterface.h" -#include "mlir/Parser.h" +#include "mlir/Parser/Parser.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SetVector.h" @@ -873,7 +873,7 @@ // Get the `sourceShape` of the `sourceType`. If the operand is a result of // `tensor.cast` operation and source of the cast operation has a static // shape, then assign it to the `sourceShape`. - auto parentOp = src.getDefiningOp(); + auto *parentOp = src.getDefiningOp(); ArrayRef sourceShape = sourceType.getShape(); if (parentOp) { if (auto castOp = dyn_cast(parentOp)) { diff --git a/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp b/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/ComprehensiveBufferizePass.cpp @@ -40,7 +40,7 @@ const LinalgComprehensiveModuleBufferize &p) = default; explicit LinalgComprehensiveModuleBufferize( - AnalysisBufferizationOptions options) + const AnalysisBufferizationOptions &options) : options(options) {} void runOnOperation() override; diff --git a/mlir/lib/Dialect/Linalg/Transforms/FusionOnTensors.cpp b/mlir/lib/Dialect/Linalg/Transforms/FusionOnTensors.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/FusionOnTensors.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/FusionOnTensors.cpp @@ -415,7 +415,7 @@ FailureOr mlir::linalg::tileConsumerAndFuseProducers( OpBuilder &b, LinalgOp consumerOp, ArrayRef tileSizes, ArrayRef tileInterchange, - Optional tileDistribution) { + const Optional &tileDistribution) { assert(tileSizes.size() == tileInterchange.size() && "expect the number of tile sizes and interchange dims to match"); assert(isPermutation(tileInterchange) && diff --git a/mlir/lib/Dialect/Linalg/Transforms/SparseTensorRewriting.cpp b/mlir/lib/Dialect/Linalg/Transforms/SparseTensorRewriting.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/SparseTensorRewriting.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/SparseTensorRewriting.cpp @@ -35,8 +35,8 @@ if (auto enc = getSparseTensorEncoding(op->get().getType())) { ArrayRef dimTypes = enc.getDimLevelType(); - for (unsigned i = 0, e = dimTypes.size(); i < e; i++) - if (dimTypes[i] == SparseTensorEncodingAttr::DimLevelType::Compressed) + for (auto dimType : dimTypes) + if (dimType == SparseTensorEncodingAttr::DimLevelType::Compressed) return true; // at least one compressed } return false; @@ -52,7 +52,7 @@ // Helper to detect sampling operation. static bool isSampling(GenericOp op) { auto yieldOp = cast(op.region().front().getTerminator()); - if (auto def = yieldOp.getOperand(0).getDefiningOp()) { + if (auto *def = yieldOp.getOperand(0).getDefiningOp()) { if (isa(def) || isa(def)) { // Both scalar input arguments used exactly once. Value s1 = op.getBlock()->getArgument(0); @@ -68,7 +68,7 @@ static bool isMulChain(Value val, Value x) { if (auto arg = val.dyn_cast()) return arg != x; - if (auto def = val.getDefiningOp()) { + if (auto *def = val.getDefiningOp()) { if (isa(def) || isa(def)) return isMulChain(def->getOperand(0), x) && isMulChain(def->getOperand(1), x); @@ -79,7 +79,7 @@ // Helper to detect x = x + . static bool isSumOfMul(GenericOp op) { auto yieldOp = cast(op.region().front().getTerminator()); - if (auto def = yieldOp.getOperand(0).getDefiningOp()) { + if (auto *def = yieldOp.getOperand(0).getDefiningOp()) { if (isa(def) || isa(def)) { Value x = op.getBlock()->getArguments().back(); return (def->getOperand(0) == x && isMulChain(def->getOperand(1), x)) || @@ -165,8 +165,8 @@ addArg(mapper, fusedBlock, consBlock.getArgument(1 - other)); addArg(mapper, fusedBlock, prodBlock.getArgument(num - 1)); // Clone bodies of the producer and consumer in new evaluation order. - auto acc = prodBlock.getTerminator()->getOperand(0).getDefiningOp(); - auto sampler = consBlock.getTerminator()->getOperand(0).getDefiningOp(); + auto *acc = prodBlock.getTerminator()->getOperand(0).getDefiningOp(); + auto *sampler = consBlock.getTerminator()->getOperand(0).getDefiningOp(); rewriter.setInsertionPointToStart(fusedBlock); Value last; for (auto &op : prodBlock.without_terminator()) diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp --- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp @@ -1355,8 +1355,8 @@ struct Conv1DNwcGenerator : public StructuredGenerator { Conv1DNwcGenerator(OpBuilder &builder, LinalgOp linalgOp, int strideW, int dilationW) - : StructuredGenerator(builder, linalgOp), valid(false), - strideW(strideW), dilationW(dilationW) { + : StructuredGenerator(builder, linalgOp), strideW(strideW), + dilationW(dilationW) { // Determine whether `linalgOp` can be generated with this generator if (linalgOp.getNumInputs() != 2 || linalgOp.getNumOutputs() != 1) return; @@ -1665,7 +1665,7 @@ } private: - bool valid; + bool valid = false; int strideW, dilationW; Value lhsShaped, rhsShaped, resShaped; ShapedType lhsShapedType, rhsShapedType, resShapedType; diff --git a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp --- a/mlir/lib/Dialect/Linalg/Utils/Utils.cpp +++ b/mlir/lib/Dialect/Linalg/Utils/Utils.cpp @@ -57,7 +57,7 @@ // `d0 + 2 * d1 + d3` is tiled by [0, 0, 0, 2] but not by [0, 0, 2, 0] // struct TileCheck : public AffineExprVisitor { - TileCheck(ValueRange tileSizes) : isTiled(false), tileSizes(tileSizes) {} + TileCheck(ValueRange tileSizes) : tileSizes(tileSizes) {} void visitDimExpr(AffineDimExpr expr) { isTiled |= !isZero(tileSizes[expr.getPosition()]); @@ -69,7 +69,7 @@ assert(expr.getRHS().cast().getValue() > 0 && "nonpositive multiplying coefficient"); } - bool isTiled; + bool isTiled = false; ValueRange tileSizes; }; diff --git a/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp --- a/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp +++ b/mlir/lib/Dialect/MemRef/IR/MemRefOps.cpp @@ -400,11 +400,11 @@ return WalkResult::advance(); }); - if (!toHoist.size()) + if (toHoist.empty()) return failure(); rewriter.setInsertionPoint(lastParentWithoutScope); - for (auto op : toHoist) { - auto cloned = rewriter.clone(*op); + for (auto *op : toHoist) { + auto *cloned = rewriter.clone(*op); rewriter.replaceOp(op, cloned->getResults()); } return success(); diff --git a/mlir/lib/Dialect/SPIRV/IR/SPIRVDialect.cpp b/mlir/lib/Dialect/SPIRV/IR/SPIRVDialect.cpp --- a/mlir/lib/Dialect/SPIRV/IR/SPIRVDialect.cpp +++ b/mlir/lib/Dialect/SPIRV/IR/SPIRVDialect.cpp @@ -19,7 +19,7 @@ #include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/DialectImplementation.h" #include "mlir/IR/MLIRContext.h" -#include "mlir/Parser.h" +#include "mlir/Parser/Parser.h" #include "mlir/Transforms/InliningUtils.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/Sequence.h" diff --git a/mlir/lib/Dialect/Shape/IR/Shape.cpp b/mlir/lib/Dialect/Shape/IR/Shape.cpp --- a/mlir/lib/Dialect/Shape/IR/Shape.cpp +++ b/mlir/lib/Dialect/Shape/IR/Shape.cpp @@ -469,8 +469,8 @@ SmallVector operands; for (Value operand : op.getInputs()) { - if (auto assume_all = operand.getDefiningOp()) - operands.append(assume_all.operand_begin(), assume_all->operand_end()); + if (auto assumeAll = operand.getDefiningOp()) + operands.append(assumeAll.operand_begin(), assumeAll->operand_end()); else operands.push_back(operand); } @@ -530,8 +530,8 @@ // Collect shapes checked by `cstr_broadcastable` operands. SmallVector>> shapes; for (auto cstr : operands) { - DenseSet shapes_set(cstr->operand_begin(), cstr->operand_end()); - shapes.emplace_back(cstr, std::move(shapes_set)); + DenseSet shapesSet(cstr->operand_begin(), cstr->operand_end()); + shapes.emplace_back(cstr, std::move(shapesSet)); } // Sort by the number of shape operands (larger to smaller). @@ -543,7 +543,7 @@ // shape operands, and remove redundant `cst_broadcastable` operations. We // do this until we find a set of `cst_broadcastable` operations with // non-overlapping constraints. - SmallVector marked_for_erase; + SmallVector markedForErase; for (unsigned i = 0; i < shapes.size(); ++i) { auto isSubset = [&](auto pair) { @@ -553,24 +553,24 @@ // Keep redundant `cstr_broadcastable` operations to be erased. auto *it = std::remove_if(shapes.begin() + i + 1, shapes.end(), isSubset); for (auto *it0 = it; it0 < shapes.end(); ++it0) - marked_for_erase.push_back(it0->first); + markedForErase.push_back(it0->first); shapes.erase(it, shapes.end()); } // We didn't find any operands that could be removed. - if (marked_for_erase.empty()) + if (markedForErase.empty()) return failure(); // Collect non-overlapping `cst_broadcastable` constraints. - SmallVector unique_constraints; + SmallVector uniqueConstraints; for (auto &shape : shapes) - unique_constraints.push_back(shape.first.getResult()); + uniqueConstraints.push_back(shape.first.getResult()); // Replace with a new `assuming_all` operation ... - rewriter.replaceOpWithNewOp(op, unique_constraints); + rewriter.replaceOpWithNewOp(op, uniqueConstraints); // ... and maybe erase `cstr_broadcastable` ops without uses. - for (auto &op : marked_for_erase) + for (auto &op : markedForErase) if (op->use_empty()) rewriter.eraseOp(op); diff --git a/mlir/lib/Dialect/Shape/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/Shape/Transforms/BufferizableOpInterfaceImpl.cpp new file mode 100644 --- /dev/null +++ b/mlir/lib/Dialect/Shape/Transforms/BufferizableOpInterfaceImpl.cpp @@ -0,0 +1,169 @@ +//===- BufferizableOpInterfaceImpl.cpp - Impl. of BufferizableOpInterface -===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/Shape/Transforms/BufferizableOpInterfaceImpl.h" + +#include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h" +#include "mlir/Dialect/Bufferization/IR/Bufferization.h" +#include "mlir/Dialect/Shape/IR/Shape.h" +#include "mlir/IR/Dialect.h" +#include "mlir/IR/Operation.h" +#include "mlir/IR/PatternMatch.h" + +using namespace mlir; +using namespace mlir::bufferization; +using namespace mlir::shape; + +namespace mlir { +namespace shape { +namespace { + +/// Bufferization of shape.assuming. +struct AssumingOpInterface + : public BufferizableOpInterface::ExternalModel { + SmallVector + getAliasingOpOperand(Operation *op, OpResult opResult, + const BufferizationState &state) const { + // AssumingOps do not have tensor OpOperands. The yielded value can be any + // SSA value that is in scope. To allow for use-def chain traversal through + // AssumingOps in the analysis, the corresponding yield value is considered + // to be aliasing with the result. + auto assumingOp = cast(op); + size_t resultNum = std::distance(op->getOpResults().begin(), + llvm::find(op->getOpResults(), opResult)); + // TODO: Support multiple blocks. + assert(assumingOp.getDoRegion().getBlocks().size() == 1 && + "expected exactly 1 block"); + auto yieldOp = dyn_cast( + assumingOp.getDoRegion().front().getTerminator()); + assert(yieldOp && "expected shape.assuming_yield terminator"); + return {&yieldOp->getOpOperand(resultNum)}; + } + + // TODO: For better bufferization results, this could return `true` only if + // there is a memory write in the region. + bool isMemoryWrite(Operation *op, OpResult opResult, + const BufferizationState &state) const { + // Similar to scf.if, results of this op are always considered memory writes + // in the analysis. This is a useful pattern for all ops that have tensor + // OpResults but no tensor OpOperands. By default, `isMemoryWrite` is + // implemented in terms of `bufferizesToMemoryWrite`, which does not work on + // ops without OpOperands. + return true; + } + + LogicalResult bufferize(Operation *op, RewriterBase &rewriter, + const BufferizationState &state) const { + auto assumingOp = cast(op); + + // Compute new result types. + SmallVector newResultTypes; + for (Type type : assumingOp->getResultTypes()) { + if (auto tensorType = type.dyn_cast()) { + newResultTypes.push_back(getMemRefType(tensorType, state.getOptions())); + } else { + newResultTypes.push_back(type); + } + } + + // Create new op and move over region. + auto newOp = rewriter.create( + op->getLoc(), newResultTypes, assumingOp.getWitness()); + newOp.getDoRegion().takeBody(assumingOp.getRegion()); + + // Update terminator. + assert(newOp.getDoRegion().getBlocks().size() == 1 && + "only 1 block supported"); + Block *newBlock = &newOp.getDoRegion().front(); + auto yieldOp = cast(newBlock->getTerminator()); + rewriter.setInsertionPoint(yieldOp); + SmallVector newYieldValues; + for (const auto &it : llvm::enumerate(yieldOp.operands())) { + Value val = it.value(); + if (val.getType().isa()) { + newYieldValues.push_back(rewriter.create( + yieldOp.getLoc(), newResultTypes[it.index()], val)); + } else { + newYieldValues.push_back(val); + } + } + rewriter.replaceOpWithNewOp(yieldOp, + newYieldValues); + + // Update all uses of the old op. + rewriter.setInsertionPointAfter(newOp); + SmallVector newResults; + for (const auto &it : llvm::enumerate(assumingOp->getResultTypes())) { + if (it.value().isa()) { + newResults.push_back(rewriter.create( + assumingOp.getLoc(), newOp->getResult(it.index()))); + } else { + newResults.push_back(newOp->getResult(it.index())); + } + } + + // Replace old op. + rewriter.replaceOp(assumingOp, newResults); + + return success(); + } + + BufferRelation bufferRelation(Operation *op, OpResult opResult, + const BufferizationState &state) const { + return BufferRelation::Equivalent; + } +}; + +/// Bufferization of shape.assuming_yield. Bufferized as part of their enclosing +/// ops, so this is for analysis only. +struct AssumingYieldOpInterface + : public BufferizableOpInterface::ExternalModel { + bool bufferizesToMemoryRead(Operation *op, OpOperand &opOperand, + const BufferizationState &state) const { + return true; + } + + bool bufferizesToMemoryWrite(Operation *op, OpOperand &opOperand, + const BufferizationState &state) const { + return false; + } + + SmallVector + getAliasingOpResult(Operation *op, OpOperand &opOperand, + const BufferizationState &state) const { + assert(isa(op->getParentOp()) && + "expected that parent is an AssumingOp"); + return {op->getParentOp()->getResult(opOperand.getOperandNumber())}; + } + + bool mustBufferizeInPlace(Operation *op, OpOperand &opOperand, + const BufferizationState &state) const { + // Yield operands always bufferize inplace. Otherwise, an alloc + copy + // may be generated inside the block. We should not return/yield allocations + // when possible. + return true; + } + + LogicalResult bufferize(Operation *op, RewriterBase &rewriter, + const BufferizationState &state) const { + // Op is bufferized as part of AssumingOp. + return failure(); + } +}; + +} // namespace +} // namespace shape +} // namespace mlir + +void mlir::shape::registerBufferizableOpInterfaceExternalModels( + DialectRegistry ®istry) { + registry.addOpInterface(); + registry.addOpInterface(); +} diff --git a/mlir/lib/Dialect/Shape/Transforms/Bufferize.cpp b/mlir/lib/Dialect/Shape/Transforms/Bufferize.cpp --- a/mlir/lib/Dialect/Shape/Transforms/Bufferize.cpp +++ b/mlir/lib/Dialect/Shape/Transforms/Bufferize.cpp @@ -8,30 +8,32 @@ #include "mlir/Dialect/Bufferization/Transforms/Bufferize.h" #include "PassDetail.h" +#include "mlir/Dialect/Bufferization/IR/BufferizableOpInterface.h" #include "mlir/Dialect/Bufferization/IR/Bufferization.h" #include "mlir/Dialect/MemRef/IR/MemRef.h" +#include "mlir/Dialect/Shape/IR/Shape.h" +#include "mlir/Dialect/Shape/Transforms/BufferizableOpInterfaceImpl.h" #include "mlir/Dialect/Shape/Transforms/Passes.h" #include "mlir/Pass/Pass.h" using namespace mlir; +using namespace bufferization; namespace { struct ShapeBufferizePass : public ShapeBufferizeBase { void runOnOperation() override { - MLIRContext &ctx = getContext(); + BufferizationOptions options = getPartialBufferizationOptions(); + options.allowDialectInFilter(); - RewritePatternSet patterns(&ctx); - bufferization::BufferizeTypeConverter typeConverter; - ConversionTarget target(ctx); - - bufferization::populateBufferizeMaterializationLegality(target); - populateShapeStructuralTypeConversionsAndLegality(typeConverter, patterns, - target); - - if (failed(applyPartialConversion(getOperation(), target, - std::move(patterns)))) + if (failed(bufferizeOp(getOperation(), options))) signalPassFailure(); } + + void getDependentDialects(DialectRegistry ®istry) const override { + registry.insert(); + shape::registerBufferizableOpInterfaceExternalModels(registry); + } }; } // namespace diff --git a/mlir/lib/Dialect/Shape/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Shape/Transforms/CMakeLists.txt --- a/mlir/lib/Dialect/Shape/Transforms/CMakeLists.txt +++ b/mlir/lib/Dialect/Shape/Transforms/CMakeLists.txt @@ -1,8 +1,8 @@ add_mlir_dialect_library(MLIRShapeOpsTransforms + BufferizableOpInterfaceImpl.cpp Bufferize.cpp RemoveShapeConstraints.cpp ShapeToShapeLowering.cpp - StructuralTypeConversions.cpp ADDITIONAL_HEADER_DIRS ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/ShapeOps/Transforms @@ -14,6 +14,7 @@ target_link_libraries(MLIRShapeOpsTransforms PUBLIC MLIRArithmetic + MLIRBufferization MLIRBufferizationTransforms MLIRIR MLIRMemRef diff --git a/mlir/lib/Dialect/Shape/Transforms/StructuralTypeConversions.cpp b/mlir/lib/Dialect/Shape/Transforms/StructuralTypeConversions.cpp deleted file mode 100644 --- a/mlir/lib/Dialect/Shape/Transforms/StructuralTypeConversions.cpp +++ /dev/null @@ -1,70 +0,0 @@ -//===- StructuralTypeConversions.cpp - Shape structural type conversions --===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "PassDetail.h" -#include "mlir/Dialect/Shape/IR/Shape.h" -#include "mlir/Dialect/Shape/Transforms/Passes.h" -#include "mlir/Transforms/DialectConversion.h" - -using namespace mlir; -using namespace mlir::shape; - -namespace { -class ConvertAssumingOpTypes : public OpConversionPattern { -public: - using OpConversionPattern::OpConversionPattern; - - LogicalResult - matchAndRewrite(AssumingOp op, OpAdaptor adaptor, - ConversionPatternRewriter &rewriter) const final { - SmallVector newResultTypes; - newResultTypes.reserve(op.getNumResults()); - for (auto result : op.getResults()) { - auto originalType = result.getType(); - Type convertedType = getTypeConverter()->convertType(originalType); - newResultTypes.push_back(convertedType); - } - - auto newAssumingOp = rewriter.create( - op.getLoc(), newResultTypes, op.getWitness()); - rewriter.inlineRegionBefore(op.getDoRegion(), newAssumingOp.getDoRegion(), - newAssumingOp.getDoRegion().end()); - rewriter.replaceOp(op, newAssumingOp.getResults()); - - return success(); - } -}; -} // namespace - -namespace { -class ConvertAssumingYieldOpTypes - : public OpConversionPattern { -public: - using OpConversionPattern::OpConversionPattern; - - LogicalResult - matchAndRewrite(AssumingYieldOp op, OpAdaptor adaptor, - ConversionPatternRewriter &rewriter) const final { - rewriter.replaceOpWithNewOp(op, adaptor.getOperands()); - return success(); - } -}; -} // namespace - -void mlir::populateShapeStructuralTypeConversionsAndLegality( - TypeConverter &typeConverter, RewritePatternSet &patterns, - ConversionTarget &target) { - patterns.add( - typeConverter, patterns.getContext()); - target.addDynamicallyLegalOp([&](AssumingOp op) { - return typeConverter.isLegal(op.getResultTypes()); - }); - target.addDynamicallyLegalOp([&](AssumingYieldOp op) { - return typeConverter.isLegal(op.getOperandTypes()); - }); -} diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp --- a/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/Sparsification.cpp @@ -52,10 +52,9 @@ indices(numTensors, std::vector(numLoops)), highs(numTensors, std::vector(numLoops)), pidxs(numTensors, std::vector(numLoops)), - idxs(numTensors, std::vector(numLoops)), redExp(-1u), redVal(), - redKind(kNoReduc), sparseOut(op), outerParNest(nest), lexIdx(), - expValues(), expFilled(), expAdded(), expCount(), curVecLength(1), - curVecMask() {} + idxs(numTensors, std::vector(numLoops)), redVal(), sparseOut(op), + outerParNest(nest), lexIdx(), expValues(), expFilled(), expAdded(), + expCount(), curVecMask() {} /// Sparsification options. SparsificationOptions options; /// Universal dense indices and upper bounds (by index). The loops array @@ -77,9 +76,9 @@ std::vector> idxs; /// Current reduction, updated during code generation. When indices of a /// reduction are exhausted, all inner loops can use a scalarized reduction. - unsigned redExp; + unsigned redExp = -1u; Value redVal; - Reduction redKind; + Reduction redKind = kNoReduc; // Sparse tensor as output. Implemented either through direct injective // insertion in lexicographic index order (where indices are updated // in the temporary array `lexIdx`) or through access pattern expansion @@ -92,7 +91,7 @@ Value expAdded; Value expCount; // Current vector length and mask. - unsigned curVecLength; + unsigned curVecLength = 1; Value curVecMask; }; diff --git a/mlir/lib/ExecutionEngine/JitRunner.cpp b/mlir/lib/ExecutionEngine/JitRunner.cpp --- a/mlir/lib/ExecutionEngine/JitRunner.cpp +++ b/mlir/lib/ExecutionEngine/JitRunner.cpp @@ -21,7 +21,7 @@ #include "mlir/ExecutionEngine/OptUtils.h" #include "mlir/IR/BuiltinTypes.h" #include "mlir/IR/MLIRContext.h" -#include "mlir/Parser.h" +#include "mlir/Parser/Parser.h" #include "mlir/Support/FileUtilities.h" #include "llvm/ADT/STLExtras.h" @@ -122,7 +122,7 @@ llvm::SourceMgr sourceMgr; sourceMgr.AddNewSourceBuffer(std::move(file), SMLoc()); - return OwningOpRef(parseSourceFile(sourceMgr, context)); + return parseSourceFile(sourceMgr, context); } static inline Error makeStringError(const Twine &message) { diff --git a/mlir/lib/IR/AffineExpr.cpp b/mlir/lib/IR/AffineExpr.cpp --- a/mlir/lib/IR/AffineExpr.cpp +++ b/mlir/lib/IR/AffineExpr.cpp @@ -328,9 +328,7 @@ "unexpected opKind"); switch (expr.getKind()) { case AffineExprKind::Constant: - if (expr.cast().getValue()) - return false; - return true; + return expr.cast().getValue() == 0; case AffineExprKind::DimId: return false; case AffineExprKind::SymbolId: diff --git a/mlir/lib/IR/AsmPrinter.cpp b/mlir/lib/IR/AsmPrinter.cpp --- a/mlir/lib/IR/AsmPrinter.cpp +++ b/mlir/lib/IR/AsmPrinter.cpp @@ -25,6 +25,7 @@ #include "mlir/IR/OpImplementation.h" #include "mlir/IR/Operation.h" #include "mlir/IR/SubElementInterfaces.h" +#include "mlir/IR/Verifier.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/MapVector.h" @@ -40,6 +41,7 @@ #include "llvm/Support/Endian.h" #include "llvm/Support/Regex.h" #include "llvm/Support/SaveAndRestore.h" +#include "llvm/Support/Threading.h" #include @@ -141,6 +143,11 @@ "mlir-print-op-generic", llvm::cl::init(false), llvm::cl::desc("Print the generic op form"), llvm::cl::Hidden}; + llvm::cl::opt assumeVerifiedOpt{ + "mlir-print-assume-verified", llvm::cl::init(false), + llvm::cl::desc("Skip op verification when using custom printers"), + llvm::cl::Hidden}; + llvm::cl::opt printLocalScopeOpt{ "mlir-print-local-scope", llvm::cl::init(false), llvm::cl::desc("Print with local scope and inline information (eliding " @@ -160,7 +167,8 @@ /// Initialize the printing flags with default supplied by the cl::opts above. OpPrintingFlags::OpPrintingFlags() : printDebugInfoFlag(false), printDebugInfoPrettyFormFlag(false), - printGenericOpFormFlag(false), printLocalScope(false) { + printGenericOpFormFlag(false), assumeVerifiedFlag(false), + printLocalScope(false) { // Initialize based upon command line options, if they are available. if (!clOptions.isConstructed()) return; @@ -169,6 +177,7 @@ printDebugInfoFlag = clOptions->printDebugInfoOpt; printDebugInfoPrettyFormFlag = clOptions->printPrettyDebugInfoOpt; printGenericOpFormFlag = clOptions->printGenericOpFormOpt; + assumeVerifiedFlag = clOptions->assumeVerifiedOpt; printLocalScope = clOptions->printLocalScopeOpt; } @@ -196,6 +205,12 @@ return *this; } +/// Do not verify the operation when using custom operation printers. +OpPrintingFlags &OpPrintingFlags::assumeVerified() { + assumeVerifiedFlag = true; + return *this; +} + /// Use local scope when printing the operation. This allows for using the /// printer in a more localized and thread-safe setting, but may not necessarily /// be identical of what the IR will look like when dumping the full module. @@ -231,6 +246,11 @@ return printGenericOpFormFlag; } +/// Return if operation verification should be skipped. +bool OpPrintingFlags::shouldAssumeVerified() const { + return assumeVerifiedFlag; +} + /// Return if the printer should use local scope when dumping the IR. bool OpPrintingFlags::shouldUseLocalScope() const { return printLocalScope; } @@ -1245,9 +1265,31 @@ } // namespace detail } // namespace mlir +/// Verifies the operation and switches to generic op printing if verification +/// fails. We need to do this because custom print functions may fail for +/// invalid ops. +static OpPrintingFlags verifyOpAndAdjustFlags(Operation *op, + OpPrintingFlags printerFlags) { + if (printerFlags.shouldPrintGenericOpForm() || + printerFlags.shouldAssumeVerified()) + return printerFlags; + + // Ignore errors emitted by the verifier. We check the thread id to avoid + // consuming other threads' errors. + auto parentThreadId = llvm::get_threadid(); + ScopedDiagnosticHandler diagHandler(op->getContext(), [&](Diagnostic &) { + return success(parentThreadId == llvm::get_threadid()); + }); + if (failed(verify(op))) + printerFlags.printGenericOpForm(); + + return printerFlags; +} + AsmState::AsmState(Operation *op, const OpPrintingFlags &printerFlags, LocationMap *locationMap) - : impl(std::make_unique(op, printerFlags, locationMap)) {} + : impl(std::make_unique( + op, verifyOpAndAdjustFlags(op, printerFlags), locationMap)) {} AsmState::~AsmState() = default; const OpPrintingFlags &AsmState::getPrinterFlags() const { @@ -2853,14 +2895,15 @@ AsmPrinter::Impl(os).printIntegerSet(*this); } -void Value::print(raw_ostream &os) { +void Value::print(raw_ostream &os) { print(os, OpPrintingFlags()); } +void Value::print(raw_ostream &os, const OpPrintingFlags &flags) { if (!impl) { os << "<>"; return; } if (auto *op = getDefiningOp()) - return op->print(os); + return op->print(os, flags); // TODO: Improve BlockArgument print'ing. BlockArgument arg = this->cast(); os << " of type '" << arg.getType() diff --git a/mlir/lib/IR/Diagnostics.cpp b/mlir/lib/IR/Diagnostics.cpp --- a/mlir/lib/IR/Diagnostics.cpp +++ b/mlir/lib/IR/Diagnostics.cpp @@ -121,6 +121,17 @@ return *this; } +/// Adjusts operation printing flags used in diagnostics for the given severity +/// level. +static OpPrintingFlags adjustPrintingFlags(OpPrintingFlags flags, + DiagnosticSeverity severity) { + flags.useLocalScope(); + flags.elideLargeElementsAttrs(); + if (severity == DiagnosticSeverity::Error) + flags.printGenericOpForm(); + return flags; +} + /// Stream in an Operation. Diagnostic &Diagnostic::operator<<(Operation &val) { return appendOp(val, OpPrintingFlags()); @@ -128,8 +139,7 @@ Diagnostic &Diagnostic::appendOp(Operation &val, const OpPrintingFlags &flags) { std::string str; llvm::raw_string_ostream os(str); - val.print(os, - OpPrintingFlags(flags).useLocalScope().elideLargeElementsAttrs()); + val.print(os, adjustPrintingFlags(flags, severity)); return *this << os.str(); } @@ -137,7 +147,7 @@ Diagnostic &Diagnostic::operator<<(Value val) { std::string str; llvm::raw_string_ostream os(str); - val.print(os); + val.print(os, adjustPrintingFlags(OpPrintingFlags(), severity)); return *this << os.str(); } @@ -844,7 +854,7 @@ Diagnostic diag; }; - ParallelDiagnosticHandlerImpl(MLIRContext *ctx) : handlerID(0), context(ctx) { + ParallelDiagnosticHandlerImpl(MLIRContext *ctx) : context(ctx) { handlerID = ctx->getDiagEngine().registerHandler([this](Diagnostic &diag) { uint64_t tid = llvm::get_threadid(); llvm::sys::SmartScopedLock lock(mutex); @@ -942,7 +952,7 @@ mutable std::vector diagnostics; /// The unique id for the parallel handler. - DiagnosticEngine::HandlerID handlerID; + DiagnosticEngine::HandlerID handlerID = 0; /// The context to emit the diagnostics to. MLIRContext *context; diff --git a/mlir/lib/IR/Operation.cpp b/mlir/lib/IR/Operation.cpp --- a/mlir/lib/IR/Operation.cpp +++ b/mlir/lib/IR/Operation.cpp @@ -1097,6 +1097,8 @@ // Check that any value that is used by an operation is defined in the // same region as either an operation result. auto *operandRegion = operand.getParentRegion(); + if (!operandRegion) + return op.emitError("operation's operand is unlinked"); if (!region.isAncestor(operandRegion)) { return op.emitOpError("using value defined outside the region") .attachNote(isolatedOp->getLoc()) diff --git a/mlir/lib/Parser/AffineParser.cpp b/mlir/lib/Parser/AffineParser.cpp --- a/mlir/lib/Parser/AffineParser.cpp +++ b/mlir/lib/Parser/AffineParser.cpp @@ -48,7 +48,7 @@ AffineParser(ParserState &state, bool allowParsingSSAIds = false, function_ref parseElement = nullptr) : Parser(state), allowParsingSSAIds(allowParsingSSAIds), - parseElement(parseElement), numDimOperands(0), numSymbolOperands(0) {} + parseElement(parseElement) {} AffineMap parseAffineMapRange(unsigned numDims, unsigned numSymbols); ParseResult parseAffineMapOrIntegerSetInline(AffineMap &map, IntegerSet &set); @@ -92,8 +92,8 @@ private: bool allowParsingSSAIds; function_ref parseElement; - unsigned numDimOperands; - unsigned numSymbolOperands; + unsigned numDimOperands = 0; + unsigned numSymbolOperands = 0; SmallVector, 4> dimsAndSymbols; }; } // namespace diff --git a/mlir/lib/Parser/Lexer.h b/mlir/lib/Parser/Lexer.h --- a/mlir/lib/Parser/Lexer.h +++ b/mlir/lib/Parser/Lexer.h @@ -14,7 +14,7 @@ #define MLIR_LIB_PARSER_LEXER_H #include "Token.h" -#include "mlir/Parser.h" +#include "mlir/Parser/Parser.h" namespace mlir { class Location; diff --git a/mlir/lib/Parser/Parser.cpp b/mlir/lib/Parser/Parser.cpp --- a/mlir/lib/Parser/Parser.cpp +++ b/mlir/lib/Parser/Parser.cpp @@ -16,8 +16,8 @@ #include "mlir/IR/BuiltinOps.h" #include "mlir/IR/Dialect.h" #include "mlir/IR/Verifier.h" -#include "mlir/Parser.h" #include "mlir/Parser/AsmParserState.h" +#include "mlir/Parser/Parser.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/StringSet.h" diff --git a/mlir/lib/Pass/PassRegistry.cpp b/mlir/lib/Pass/PassRegistry.cpp --- a/mlir/lib/Pass/PassRegistry.cpp +++ b/mlir/lib/Pass/PassRegistry.cpp @@ -326,11 +326,11 @@ /// the name is the name of a pass, the InnerPipeline is empty, since passes /// cannot contain inner pipelines. struct PipelineElement { - PipelineElement(StringRef name) : name(name), registryEntry(nullptr) {} + PipelineElement(StringRef name) : name(name) {} StringRef name; StringRef options; - const PassRegistryEntry *registryEntry; + const PassRegistryEntry *registryEntry = nullptr; std::vector innerPipeline; }; diff --git a/mlir/lib/Support/CMakeLists.txt b/mlir/lib/Support/CMakeLists.txt --- a/mlir/lib/Support/CMakeLists.txt +++ b/mlir/lib/Support/CMakeLists.txt @@ -3,7 +3,6 @@ FileUtilities.cpp IndentedOstream.cpp InterfaceSupport.cpp - MlirOptMain.cpp StorageUniquer.cpp Timing.cpp ToolUtilities.cpp @@ -24,18 +23,6 @@ LINK_LIBS PUBLIC ${LLVM_PTHREAD_LIB}) -add_mlir_library(MLIROptLib - MlirOptMain.cpp - - ADDITIONAL_HEADER_DIRS - ${MLIR_MAIN_INCLUDE_DIR}/mlir/Support - - LINK_LIBS PUBLIC - MLIRPass - MLIRParser - MLIRSupport - ) - # This doesn't use add_mlir_library as it is used in mlir-tblgen and else # mlir-tblgen ends up depending on mlir-generic-headers. add_llvm_library(MLIRSupportIndentedOstream diff --git a/mlir/lib/Target/Cpp/CMakeLists.txt b/mlir/lib/Target/Cpp/CMakeLists.txt --- a/mlir/lib/Target/Cpp/CMakeLists.txt +++ b/mlir/lib/Target/Cpp/CMakeLists.txt @@ -14,5 +14,5 @@ MLIRMath MLIRSCF MLIRSupport - MLIRTranslation + MLIRTranslateLib ) diff --git a/mlir/lib/Target/Cpp/TranslateRegistration.cpp b/mlir/lib/Target/Cpp/TranslateRegistration.cpp --- a/mlir/lib/Target/Cpp/TranslateRegistration.cpp +++ b/mlir/lib/Target/Cpp/TranslateRegistration.cpp @@ -15,7 +15,7 @@ #include "mlir/IR/BuiltinOps.h" #include "mlir/IR/Dialect.h" #include "mlir/Target/Cpp/CppEmitter.h" -#include "mlir/Translation.h" +#include "mlir/Tools/mlir-translate/Translation.h" #include "llvm/Support/CommandLine.h" using namespace mlir; diff --git a/mlir/lib/Target/LLVMIR/CMakeLists.txt b/mlir/lib/Target/LLVMIR/CMakeLists.txt --- a/mlir/lib/Target/LLVMIR/CMakeLists.txt +++ b/mlir/lib/Target/LLVMIR/CMakeLists.txt @@ -30,7 +30,7 @@ MLIRDLTI MLIRLLVMIR MLIRLLVMIRTransforms - MLIRTranslation + MLIRTranslateLib ) add_mlir_translation_library(MLIRToLLVMIRTranslationRegistration @@ -62,5 +62,5 @@ LINK_LIBS PUBLIC MLIRDLTI MLIRLLVMIR - MLIRTranslation + MLIRTranslateLib ) diff --git a/mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp b/mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp --- a/mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp +++ b/mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp @@ -20,7 +20,7 @@ #include "mlir/IR/MLIRContext.h" #include "mlir/Interfaces/DataLayoutInterfaces.h" #include "mlir/Target/LLVMIR/TypeFromLLVM.h" -#include "mlir/Translation.h" +#include "mlir/Tools/mlir-translate/Translation.h" #include "llvm/ADT/StringSet.h" #include "llvm/ADT/TypeSwitch.h" diff --git a/mlir/lib/Target/LLVMIR/ConvertToLLVMIR.cpp b/mlir/lib/Target/LLVMIR/ConvertToLLVMIR.cpp --- a/mlir/lib/Target/LLVMIR/ConvertToLLVMIR.cpp +++ b/mlir/lib/Target/LLVMIR/ConvertToLLVMIR.cpp @@ -14,7 +14,7 @@ #include "mlir/IR/BuiltinOps.h" #include "mlir/Target/LLVMIR/Dialect/All.h" #include "mlir/Target/LLVMIR/Export.h" -#include "mlir/Translation.h" +#include "mlir/Tools/mlir-translate/Translation.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" diff --git a/mlir/lib/Target/SPIRV/CMakeLists.txt b/mlir/lib/Target/SPIRV/CMakeLists.txt --- a/mlir/lib/Target/SPIRV/CMakeLists.txt +++ b/mlir/lib/Target/SPIRV/CMakeLists.txt @@ -24,5 +24,5 @@ MLIRSPIRVSerialization MLIRSPIRVDeserialization MLIRSupport - MLIRTranslation + MLIRTranslateLib ) diff --git a/mlir/lib/Target/SPIRV/Deserialization/CMakeLists.txt b/mlir/lib/Target/SPIRV/Deserialization/CMakeLists.txt --- a/mlir/lib/Target/SPIRV/Deserialization/CMakeLists.txt +++ b/mlir/lib/Target/SPIRV/Deserialization/CMakeLists.txt @@ -11,7 +11,7 @@ MLIRSPIRV MLIRSPIRVBinaryUtils MLIRSupport - MLIRTranslation + MLIRTranslateLib ) diff --git a/mlir/lib/Target/SPIRV/Serialization/CMakeLists.txt b/mlir/lib/Target/SPIRV/Serialization/CMakeLists.txt --- a/mlir/lib/Target/SPIRV/Serialization/CMakeLists.txt +++ b/mlir/lib/Target/SPIRV/Serialization/CMakeLists.txt @@ -11,7 +11,7 @@ MLIRSPIRV MLIRSPIRVBinaryUtils MLIRSupport - MLIRTranslation + MLIRTranslateLib ) diff --git a/mlir/lib/Target/SPIRV/TranslateRegistration.cpp b/mlir/lib/Target/SPIRV/TranslateRegistration.cpp --- a/mlir/lib/Target/SPIRV/TranslateRegistration.cpp +++ b/mlir/lib/Target/SPIRV/TranslateRegistration.cpp @@ -17,11 +17,11 @@ #include "mlir/IR/BuiltinOps.h" #include "mlir/IR/Dialect.h" #include "mlir/IR/Verifier.h" -#include "mlir/Parser.h" +#include "mlir/Parser/Parser.h" #include "mlir/Support/FileUtilities.h" #include "mlir/Target/SPIRV/Deserialization.h" #include "mlir/Target/SPIRV/Serialization.h" -#include "mlir/Translation.h" +#include "mlir/Tools/mlir-translate/Translation.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/SMLoc.h" diff --git a/mlir/lib/Tools/CMakeLists.txt b/mlir/lib/Tools/CMakeLists.txt --- a/mlir/lib/Tools/CMakeLists.txt +++ b/mlir/lib/Tools/CMakeLists.txt @@ -1,3 +1,5 @@ add_subdirectory(mlir-lsp-server) +add_subdirectory(mlir-opt) add_subdirectory(mlir-reduce) +add_subdirectory(mlir-translate) add_subdirectory(PDLL) diff --git a/mlir/lib/Tools/PDLL/CodeGen/MLIRGen.cpp b/mlir/lib/Tools/PDLL/CodeGen/MLIRGen.cpp --- a/mlir/lib/Tools/PDLL/CodeGen/MLIRGen.cpp +++ b/mlir/lib/Tools/PDLL/CodeGen/MLIRGen.cpp @@ -13,7 +13,7 @@ #include "mlir/IR/Builders.h" #include "mlir/IR/BuiltinOps.h" #include "mlir/IR/Verifier.h" -#include "mlir/Parser.h" +#include "mlir/Parser/Parser.h" #include "mlir/Tools/PDLL/AST/Context.h" #include "mlir/Tools/PDLL/AST/Nodes.h" #include "mlir/Tools/PDLL/AST/Types.h" diff --git a/mlir/lib/Tools/PDLL/Parser/Parser.cpp b/mlir/lib/Tools/PDLL/Parser/Parser.cpp --- a/mlir/lib/Tools/PDLL/Parser/Parser.cpp +++ b/mlir/lib/Tools/PDLL/Parser/Parser.cpp @@ -43,8 +43,7 @@ public: Parser(ast::Context &ctx, llvm::SourceMgr &sourceMgr) : ctx(ctx), lexer(sourceMgr, ctx.getDiagEngine()), - curToken(lexer.lexToken()), curDeclScope(nullptr), - valueTy(ast::ValueType::get(ctx)), + curToken(lexer.lexToken()), valueTy(ast::ValueType::get(ctx)), valueRangeTy(ast::ValueRangeType::get(ctx)), typeTy(ast::TypeType::get(ctx)), typeRangeTy(ast::TypeRangeType::get(ctx)), @@ -469,7 +468,7 @@ Token curToken; /// The most recently defined decl scope. - ast::DeclScope *curDeclScope; + ast::DeclScope *curDeclScope = nullptr; llvm::SpecificBumpPtrAllocator scopeAllocator; /// The current context of the parser. diff --git a/mlir/lib/Tools/mlir-lsp-server/MLIRServer.cpp b/mlir/lib/Tools/mlir-lsp-server/MLIRServer.cpp --- a/mlir/lib/Tools/mlir-lsp-server/MLIRServer.cpp +++ b/mlir/lib/Tools/mlir-lsp-server/MLIRServer.cpp @@ -10,8 +10,8 @@ #include "lsp/Logging.h" #include "lsp/Protocol.h" #include "mlir/IR/Operation.h" -#include "mlir/Parser.h" #include "mlir/Parser/AsmParserState.h" +#include "mlir/Parser/Parser.h" #include "llvm/Support/SourceMgr.h" using namespace mlir; @@ -716,7 +716,7 @@ int64_t version; /// The number of lines in the file. - int64_t totalNumLines; + int64_t totalNumLines = 0; /// The chunks of this file. The order of these chunks is the order in which /// they appear in the text file. @@ -728,7 +728,7 @@ int64_t version, DialectRegistry ®istry, std::vector &diagnostics) : context(registry, MLIRContext::Threading::DISABLED), - contents(fileContents.str()), version(version), totalNumLines(0) { + contents(fileContents.str()), version(version) { context.allowUnregisteredDialects(); // Split the file into separate MLIR documents. diff --git a/mlir/lib/Tools/mlir-opt/CMakeLists.txt b/mlir/lib/Tools/mlir-opt/CMakeLists.txt new file mode 100644 --- /dev/null +++ b/mlir/lib/Tools/mlir-opt/CMakeLists.txt @@ -0,0 +1,11 @@ +add_mlir_library(MLIROptLib + MlirOptMain.cpp + + ADDITIONAL_HEADER_DIRS + ${MLIR_MAIN_INCLUDE_DIR}/mlir/Tools/mlir-opt + + LINK_LIBS PUBLIC + MLIRPass + MLIRParser + MLIRSupport + ) diff --git a/mlir/lib/Support/MlirOptMain.cpp b/mlir/lib/Tools/mlir-opt/MlirOptMain.cpp rename from mlir/lib/Support/MlirOptMain.cpp rename to mlir/lib/Tools/mlir-opt/MlirOptMain.cpp --- a/mlir/lib/Support/MlirOptMain.cpp +++ b/mlir/lib/Tools/mlir-opt/MlirOptMain.cpp @@ -11,7 +11,7 @@ // //===----------------------------------------------------------------------===// -#include "mlir/Support/MlirOptMain.h" +#include "mlir/Tools/mlir-opt/MlirOptMain.h" #include "mlir/IR/AsmState.h" #include "mlir/IR/Attributes.h" #include "mlir/IR/BuiltinOps.h" @@ -19,7 +19,7 @@ #include "mlir/IR/Dialect.h" #include "mlir/IR/Location.h" #include "mlir/IR/MLIRContext.h" -#include "mlir/Parser.h" +#include "mlir/Parser/Parser.h" #include "mlir/Pass/Pass.h" #include "mlir/Pass/PassManager.h" #include "mlir/Support/DebugCounter.h" @@ -59,7 +59,7 @@ // Parse the input file and reset the context threading state. TimingScope parserTiming = timing.nest("Parser"); - OwningOpRef module(parseSourceFile(sourceMgr, context)); + OwningOpRef module(parseSourceFile(sourceMgr, context)); context->enableMultithreading(wasThreadingEnabled); if (!module) return failure(); diff --git a/mlir/lib/Tools/mlir-reduce/MlirReduceMain.cpp b/mlir/lib/Tools/mlir-reduce/MlirReduceMain.cpp --- a/mlir/lib/Tools/mlir-reduce/MlirReduceMain.cpp +++ b/mlir/lib/Tools/mlir-reduce/MlirReduceMain.cpp @@ -15,7 +15,7 @@ #include "mlir/Tools/mlir-reduce/MlirReduceMain.h" #include "mlir/IR/PatternMatch.h" -#include "mlir/Parser.h" +#include "mlir/Parser/Parser.h" #include "mlir/Pass/Pass.h" #include "mlir/Pass/PassManager.h" #include "mlir/Reducer/Passes.h" @@ -31,7 +31,7 @@ static LogicalResult loadModule(MLIRContext &context, OwningOpRef &module, StringRef inputFilename) { - module = parseSourceFile(inputFilename, &context); + module = parseSourceFile(inputFilename, &context); if (!module) return failure(); diff --git a/mlir/lib/Tools/mlir-translate/CMakeLists.txt b/mlir/lib/Tools/mlir-translate/CMakeLists.txt new file mode 100644 --- /dev/null +++ b/mlir/lib/Tools/mlir-translate/CMakeLists.txt @@ -0,0 +1,11 @@ +add_mlir_library(MLIRTranslateLib + MlirTranslateMain.cpp + Translation.cpp + + ADDITIONAL_HEADER_DIRS + ${MLIR_MAIN_INCLUDE_DIR}/mlir/Tools/mlir-translate + + LINK_LIBS PUBLIC + MLIRIR + MLIRParser + ) diff --git a/mlir/lib/Tools/mlir-translate/MlirTranslateMain.cpp b/mlir/lib/Tools/mlir-translate/MlirTranslateMain.cpp new file mode 100644 --- /dev/null +++ b/mlir/lib/Tools/mlir-translate/MlirTranslateMain.cpp @@ -0,0 +1,111 @@ +//===- MlirTranslateMain.cpp - MLIR Translation entry point ---------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "mlir/Tools/mlir-translate/MlirTranslateMain.h" +#include "mlir/IR/AsmState.h" +#include "mlir/IR/BuiltinOps.h" +#include "mlir/IR/Dialect.h" +#include "mlir/IR/Verifier.h" +#include "mlir/Parser/Parser.h" +#include "mlir/Support/FileUtilities.h" +#include "mlir/Support/ToolUtilities.h" +#include "mlir/Tools/mlir-translate/Translation.h" +#include "llvm/Support/InitLLVM.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/ToolOutputFile.h" + +using namespace mlir; + +//===----------------------------------------------------------------------===// +// Translation Parser +//===----------------------------------------------------------------------===// + +LogicalResult mlir::mlirTranslateMain(int argc, char **argv, + llvm::StringRef toolName) { + + static llvm::cl::opt inputFilename( + llvm::cl::Positional, llvm::cl::desc(""), + llvm::cl::init("-")); + + static llvm::cl::opt outputFilename( + "o", llvm::cl::desc("Output filename"), llvm::cl::value_desc("filename"), + llvm::cl::init("-")); + + static llvm::cl::opt allowUnregisteredDialects( + "allow-unregistered-dialect", + llvm::cl::desc("Allow operation with no registered dialects"), + llvm::cl::init(false)); + + static llvm::cl::opt splitInputFile( + "split-input-file", + llvm::cl::desc("Split the input file into pieces and " + "process each chunk independently"), + llvm::cl::init(false)); + + static llvm::cl::opt verifyDiagnostics( + "verify-diagnostics", + llvm::cl::desc("Check that emitted diagnostics match " + "expected-* lines on the corresponding line"), + llvm::cl::init(false)); + + llvm::InitLLVM y(argc, argv); + + // Add flags for all the registered translations. + llvm::cl::opt + translationRequested("", llvm::cl::desc("Translation to perform"), + llvm::cl::Required); + registerAsmPrinterCLOptions(); + registerMLIRContextCLOptions(); + llvm::cl::ParseCommandLineOptions(argc, argv, toolName); + + std::string errorMessage; + auto input = openInputFile(inputFilename, &errorMessage); + if (!input) { + llvm::errs() << errorMessage << "\n"; + return failure(); + } + + auto output = openOutputFile(outputFilename, &errorMessage); + if (!output) { + llvm::errs() << errorMessage << "\n"; + return failure(); + } + + // Processes the memory buffer with a new MLIRContext. + auto processBuffer = [&](std::unique_ptr ownedBuffer, + raw_ostream &os) { + MLIRContext context; + context.allowUnregisteredDialects(allowUnregisteredDialects); + context.printOpOnDiagnostic(!verifyDiagnostics); + llvm::SourceMgr sourceMgr; + sourceMgr.AddNewSourceBuffer(std::move(ownedBuffer), SMLoc()); + + if (!verifyDiagnostics) { + SourceMgrDiagnosticHandler sourceMgrHandler(sourceMgr, &context); + return (*translationRequested)(sourceMgr, os, &context); + } + + // In the diagnostic verification flow, we ignore whether the translation + // failed (in most cases, it is expected to fail). Instead, we check if the + // diagnostics were produced as expected. + SourceMgrDiagnosticVerifierHandler sourceMgrHandler(sourceMgr, &context); + (void)(*translationRequested)(sourceMgr, os, &context); + return sourceMgrHandler.verify(); + }; + + if (splitInputFile) { + if (failed(splitAndProcessBuffer(std::move(input), processBuffer, + output->os()))) + return failure(); + } else if (failed(processBuffer(std::move(input), output->os()))) { + return failure(); + } + + output->keep(); + return success(); +} diff --git a/mlir/lib/Translation/Translation.cpp b/mlir/lib/Tools/mlir-translate/Translation.cpp rename from mlir/lib/Translation/Translation.cpp rename to mlir/lib/Tools/mlir-translate/Translation.cpp --- a/mlir/lib/Translation/Translation.cpp +++ b/mlir/lib/Tools/mlir-translate/Translation.cpp @@ -10,17 +10,13 @@ // //===----------------------------------------------------------------------===// -#include "mlir/Translation.h" +#include "mlir/Tools/mlir-translate/Translation.h" #include "mlir/IR/AsmState.h" #include "mlir/IR/BuiltinOps.h" #include "mlir/IR/Dialect.h" #include "mlir/IR/Verifier.h" -#include "mlir/Parser.h" -#include "mlir/Support/FileUtilities.h" -#include "mlir/Support/ToolUtilities.h" -#include "llvm/Support/InitLLVM.h" +#include "mlir/Parser/Parser.h" #include "llvm/Support/SourceMgr.h" -#include "llvm/Support/ToolOutputFile.h" using namespace mlir; @@ -101,7 +97,7 @@ DialectRegistry registry; dialectRegistration(registry); context->appendDialectRegistry(registry); - auto module = OwningOpRef(parseSourceFile(sourceMgr, context)); + auto module = parseSourceFile(sourceMgr, context); if (!module || failed(verify(*module))) return failure(); return function(module.get(), output); @@ -128,88 +124,3 @@ }); llvm::cl::parser::printOptionInfo(o, globalWidth); } - -LogicalResult mlir::mlirTranslateMain(int argc, char **argv, - llvm::StringRef toolName) { - - static llvm::cl::opt inputFilename( - llvm::cl::Positional, llvm::cl::desc(""), - llvm::cl::init("-")); - - static llvm::cl::opt outputFilename( - "o", llvm::cl::desc("Output filename"), llvm::cl::value_desc("filename"), - llvm::cl::init("-")); - - static llvm::cl::opt allowUnregisteredDialects( - "allow-unregistered-dialect", - llvm::cl::desc("Allow operation with no registered dialects"), - llvm::cl::init(false)); - - static llvm::cl::opt splitInputFile( - "split-input-file", - llvm::cl::desc("Split the input file into pieces and " - "process each chunk independently"), - llvm::cl::init(false)); - - static llvm::cl::opt verifyDiagnostics( - "verify-diagnostics", - llvm::cl::desc("Check that emitted diagnostics match " - "expected-* lines on the corresponding line"), - llvm::cl::init(false)); - - llvm::InitLLVM y(argc, argv); - - // Add flags for all the registered translations. - llvm::cl::opt - translationRequested("", llvm::cl::desc("Translation to perform"), - llvm::cl::Required); - registerAsmPrinterCLOptions(); - registerMLIRContextCLOptions(); - llvm::cl::ParseCommandLineOptions(argc, argv, toolName); - - std::string errorMessage; - auto input = openInputFile(inputFilename, &errorMessage); - if (!input) { - llvm::errs() << errorMessage << "\n"; - return failure(); - } - - auto output = openOutputFile(outputFilename, &errorMessage); - if (!output) { - llvm::errs() << errorMessage << "\n"; - return failure(); - } - - // Processes the memory buffer with a new MLIRContext. - auto processBuffer = [&](std::unique_ptr ownedBuffer, - raw_ostream &os) { - MLIRContext context; - context.allowUnregisteredDialects(allowUnregisteredDialects); - context.printOpOnDiagnostic(!verifyDiagnostics); - llvm::SourceMgr sourceMgr; - sourceMgr.AddNewSourceBuffer(std::move(ownedBuffer), SMLoc()); - - if (!verifyDiagnostics) { - SourceMgrDiagnosticHandler sourceMgrHandler(sourceMgr, &context); - return (*translationRequested)(sourceMgr, os, &context); - } - - // In the diagnostic verification flow, we ignore whether the translation - // failed (in most cases, it is expected to fail). Instead, we check if the - // diagnostics were produced as expected. - SourceMgrDiagnosticVerifierHandler sourceMgrHandler(sourceMgr, &context); - (void)(*translationRequested)(sourceMgr, os, &context); - return sourceMgrHandler.verify(); - }; - - if (splitInputFile) { - if (failed(splitAndProcessBuffer(std::move(input), processBuffer, - output->os()))) - return failure(); - } else if (failed(processBuffer(std::move(input), output->os()))) { - return failure(); - } - - output->keep(); - return success(); -} diff --git a/mlir/lib/Transforms/CSE.cpp b/mlir/lib/Transforms/CSE.cpp --- a/mlir/lib/Transforms/CSE.cpp +++ b/mlir/lib/Transforms/CSE.cpp @@ -63,8 +63,7 @@ /// Represents a single entry in the depth first traversal of a CFG. struct CFGStackNode { CFGStackNode(ScopedMapTy &knownValues, DominanceInfoNode *node) - : scope(knownValues), node(node), childIterator(node->begin()), - processed(false) {} + : scope(knownValues), node(node), childIterator(node->begin()) {} /// Scope for the known values. ScopedMapTy::ScopeTy scope; @@ -73,7 +72,7 @@ DominanceInfoNode::const_iterator childIterator; /// If this node has been fully processed yet or not. - bool processed; + bool processed = false; }; /// Attempt to eliminate a redundant operation. Returns success if the diff --git a/mlir/lib/Transforms/Utils/ControlFlowSinkUtils.cpp b/mlir/lib/Transforms/Utils/ControlFlowSinkUtils.cpp --- a/mlir/lib/Transforms/Utils/ControlFlowSinkUtils.cpp +++ b/mlir/lib/Transforms/Utils/ControlFlowSinkUtils.cpp @@ -35,8 +35,7 @@ /// Create an operation sinker with given dominance info. Sinker(function_ref shouldMoveIntoRegion, DominanceInfo &domInfo) - : shouldMoveIntoRegion(shouldMoveIntoRegion), domInfo(domInfo), - numSunk(0) {} + : shouldMoveIntoRegion(shouldMoveIntoRegion), domInfo(domInfo) {} /// Given a list of regions, find operations to sink and sink them. Return the /// number of operations sunk. @@ -65,7 +64,7 @@ /// Dominance info to determine op user dominance with respect to regions. DominanceInfo &domInfo; /// The number of operations sunk. - size_t numSunk; + size_t numSunk = 0; }; } // end anonymous namespace diff --git a/mlir/lib/Translation/CMakeLists.txt b/mlir/lib/Translation/CMakeLists.txt deleted file mode 100644 --- a/mlir/lib/Translation/CMakeLists.txt +++ /dev/null @@ -1,10 +0,0 @@ -add_mlir_library(MLIRTranslation - Translation.cpp - - ADDITIONAL_HEADER_DIRS - ${MLIR_MAIN_INCLUDE_DIR}/mlir/Translation - - LINK_LIBS PUBLIC - MLIRIR - MLIRParser - ) diff --git a/mlir/test/Dialect/Affine/affine-loop-normalize.mlir b/mlir/test/Dialect/Affine/affine-loop-normalize.mlir --- a/mlir/test/Dialect/Affine/affine-loop-normalize.mlir +++ b/mlir/test/Dialect/Affine/affine-loop-normalize.mlir @@ -26,6 +26,16 @@ // ----- +// CHECK-LABEL: func @relative_bounds +func @relative_bounds(%arg: index) { + // CHECK: affine.for %{{.*}} = 0 to 4 + affine.for %i = affine_map<(d0) -> (d0)>(%arg) to affine_map<(d0) -> (d0 + 4)>(%arg) { + } + return +} + +// ----- + // Check that single iteration loop is removed and its body is promoted to the // parent block. @@ -103,7 +113,7 @@ // CHECK-DAG: [[$OUTERIV:#map[0-9]+]] = affine_map<(d0) -> (d0 * 32 + 2)> // CHECK-DAG: [[$INNERIV:#map[0-9]+]] = affine_map<(d0) -> (d0 + 2)> // CHECK-DAG: [[$OUTERUB:#map[0-9]+]] = affine_map<()[s0] -> ((s0 - 2) ceildiv 32)> -// CHECK-DAG: [[$INNERUB:#map[0-9]+]] = affine_map<(d0) -> (d0 - 2, 510)> +// CHECK-DAG: [[$INNERUB:#map[0-9]+]] = affine_map<()[s0] -> (s0 - 2, 510)> // CHECK-LABEL: func @loop_with_multiple_upper_bounds // CHECK-SAME: (%[[ARG0:.*]]: memref, %[[ARG1:.*]]: index) @@ -111,7 +121,7 @@ // CHECK-NEXT: %[[DIM:.*]] = memref.dim %arg0, %c0 : memref // CHECK-NEXT: affine.for %[[I:.*]] = 0 to [[$OUTERUB]]()[%[[DIM]]] { // CHECK-NEXT: %[[IIV:.*]] = affine.apply [[$OUTERIV]](%[[I]]) -// CHECK-NEXT: affine.for %[[II:.*]] = 0 to min [[$INNERUB]](%[[ARG1]]) { +// CHECK-NEXT: affine.for %[[II:.*]] = 0 to min [[$INNERUB]]()[%[[ARG1]]] { // CHECK-NEXT: %[[IIIV:.*]] = affine.apply [[$INNERIV]](%[[II]]) // CHECK-NEXT: "test.foo"(%[[IIV]], %[[IIIV]]) // CHECK-NEXT: } @@ -133,7 +143,7 @@ // CHECK-DAG: [[$INTERUB:#map[0-9]+]] = affine_map<()[s0] -> (s0 ceildiv 32)> // CHECK-DAG: [[$INTERIV:#map[0-9]+]] = affine_map<(d0) -> (d0 * 32)> -// CHECK-DAG: [[$INTRAUB:#map[0-9]+]] = affine_map<(d0, d1)[s0] -> (32, -d0 + s0)> +// CHECK-DAG: [[$INTRAUB:#map[0-9]+]] = affine_map<(d0)[s0] -> (32, -d0 + s0)> // CHECK-DAG: [[$INTRAIV:#map[0-9]+]] = affine_map<(d0, d1) -> (d1 + d0)> // CHECK-LABEL: func @tiled_matmul @@ -149,11 +159,11 @@ // CHECK-NEXT: %[[JIV:.*]] = affine.apply [[$INTERIV]](%[[J]]) // CHECK-NEXT: affine.for %[[K:.*]] = 0 to [[$INTERUB]]()[%[[DIM2]]] { // CHECK-NEXT: %[[KIV:.*]] = affine.apply [[$INTERIV]](%[[K]]) -// CHECK-NEXT: affine.for %[[II:.*]] = 0 to min [[$INTRAUB]](%[[IIV]], %[[IIV]])[%[[DIM0]]] { +// CHECK-NEXT: affine.for %[[II:.*]] = 0 to min [[$INTRAUB]](%[[IIV]])[%[[DIM0]]] { // CHECK-NEXT: %[[IIIV:.*]] = affine.apply [[$INTRAIV]](%[[IIV]], %[[II]]) -// CHECK-NEXT: affine.for %[[JJ:.*]] = 0 to min [[$INTRAUB]](%[[JIV]], %[[JIV]])[%[[DIM1]]] { +// CHECK-NEXT: affine.for %[[JJ:.*]] = 0 to min [[$INTRAUB]](%[[JIV]])[%[[DIM1]]] { // CHECK-NEXT: %[[JJIV:.*]] = affine.apply [[$INTRAIV]](%[[JIV]], %[[JJ]]) -// CHECK-NEXT: affine.for %[[KK:.*]] = 0 to min [[$INTRAUB]](%[[KIV]], %[[KIV]])[%[[DIM2]]] { +// CHECK-NEXT: affine.for %[[KK:.*]] = 0 to min [[$INTRAUB]](%[[KIV]])[%[[DIM2]]] { // CHECK-NEXT: %[[KKIV:.*]] = affine.apply [[$INTRAIV]](%[[KIV]], %[[KK]]) // CHECK-NEXT: %{{.*}} = affine.load %[[ARG0]][%[[IIIV]], %[[KKIV]]] : memref<1024x1024xf32> // CHECK-NEXT: %{{.*}} = affine.load %[[ARG1]][%[[KKIV]], %[[JJIV]]] : memref<1024x1024xf32> diff --git a/mlir/test/IR/print-ir-invalid.mlir b/mlir/test/IR/print-ir-invalid.mlir new file mode 100644 --- /dev/null +++ b/mlir/test/IR/print-ir-invalid.mlir @@ -0,0 +1,33 @@ +// # RUN: mlir-opt -test-print-invalid %s | FileCheck %s +// # RUN: mlir-opt -test-print-invalid %s --mlir-print-assume-verified | FileCheck %s --check-prefix=ASSUME-VERIFIED + +// The pass creates some ops and prints them to stdout, the input is just an +// empty module. +module {} + +// The operation is invalid because the body does not have a terminator, print +// the generic form. +// CHECK: Invalid operation: +// CHECK-NEXT: "builtin.func"() ({ +// CHECK-NEXT: ^bb0: +// CHECK-NEXT: }) +// CHECK-SAME: sym_name = "test" + +// The operation is valid because the body has a terminator, print the custom +// form. +// CHECK: Valid operation: +// CHECK-NEXT: func @test() { +// CHECK-NEXT: return +// CHECK-NEXT: } + +// With --mlir-print-assume-verified the custom form is printed in both cases. +// This works in this particular case, but may crash in general. + +// ASSUME-VERIFIED: Invalid operation: +// ASSUME-VERIFIED-NEXT: func @test() { +// ASSUME-VERIFIED-NEXT: } + +// ASSUME-VERIFIED: Valid operation: +// ASSUME-VERIFIED-NEXT: func @test() { +// ASSUME-VERIFIED-NEXT: return +// ASSUME-VERIFIED-NEXT: } diff --git a/mlir/test/Target/LLVMIR/openmp-nested.mlir b/mlir/test/Target/LLVMIR/openmp-nested.mlir new file mode 100644 --- /dev/null +++ b/mlir/test/Target/LLVMIR/openmp-nested.mlir @@ -0,0 +1,41 @@ +// RUN: mlir-translate -mlir-to-llvmir -split-input-file %s | FileCheck %s + +module { + llvm.func @printf(!llvm.ptr, ...) -> i32 + llvm.mlir.global internal constant @str0("WG size of kernel = %d X %d\0A\00") + + llvm.func @main(%arg0: i32, %arg1: !llvm.ptr>) -> i32 { + omp.parallel { + %0 = llvm.mlir.constant(1 : index) : i64 + %1 = llvm.mlir.constant(10 : index) : i64 + %2 = llvm.mlir.constant(0 : index) : i64 + %4 = llvm.mlir.constant(0 : i32) : i32 + %12 = llvm.alloca %0 x i64 : (i64) -> !llvm.ptr + omp.wsloop (%arg2) : i64 = (%2) to (%1) step (%0) { + omp.parallel { + omp.wsloop (%arg3) : i64 = (%2) to (%0) step (%0) { + llvm.store %2, %12 : !llvm.ptr + omp.yield + } + omp.terminator + } + %19 = llvm.load %12 : !llvm.ptr + %20 = llvm.trunc %19 : i64 to i32 + %5 = llvm.mlir.addressof @str0 : !llvm.ptr> + %6 = llvm.getelementptr %5[%4, %4] : (!llvm.ptr>, i32, i32) -> !llvm.ptr + %21 = llvm.call @printf(%6, %20, %20) : (!llvm.ptr, i32, i32) -> i32 + omp.yield + } + omp.terminator + } + %a4 = llvm.mlir.constant(0 : i32) : i32 + llvm.return %a4 : i32 + } + +} + +// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @1, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @[[inner1:.+]] to void (i32*, i32*, ...)*)) + +// CHECK: define internal void @[[inner1]] +// CHECK: %[[structArg:.+]] = alloca { i64* } +// CHECK: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @3, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, { i64* }*)* @[[inner2:.+]] to void (i32*, i32*, ...)*), { i64* }* %[[structArg]]) diff --git a/mlir/test/lib/Dialect/Tensor/TestTensorTransforms.cpp b/mlir/test/lib/Dialect/Tensor/TestTensorTransforms.cpp --- a/mlir/test/lib/Dialect/Tensor/TestTensorTransforms.cpp +++ b/mlir/test/lib/Dialect/Tensor/TestTensorTransforms.cpp @@ -65,10 +65,7 @@ auto resultType = op.result().getType().cast(); constexpr int64_t kConstantFoldingMaxNumElements = 1024; - if (resultType.getNumElements() > kConstantFoldingMaxNumElements) - return false; - - return true; + return resultType.getNumElements() <= kConstantFoldingMaxNumElements; }; tensor::populateFoldConstantExtractSlicePatterns(patterns, controlFn); diff --git a/mlir/test/lib/IR/CMakeLists.txt b/mlir/test/lib/IR/CMakeLists.txt --- a/mlir/test/lib/IR/CMakeLists.txt +++ b/mlir/test/lib/IR/CMakeLists.txt @@ -9,6 +9,7 @@ TestOpaqueLoc.cpp TestOperationEquals.cpp TestPrintDefUse.cpp + TestPrintInvalid.cpp TestPrintNesting.cpp TestSideEffects.cpp TestSlicing.cpp diff --git a/mlir/test/lib/IR/TestPrintInvalid.cpp b/mlir/test/lib/IR/TestPrintInvalid.cpp new file mode 100644 --- /dev/null +++ b/mlir/test/lib/IR/TestPrintInvalid.cpp @@ -0,0 +1,52 @@ +//===- TestPrintInvalid.cpp - Test printing invalid ops -------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass creates and prints to the standard output an invalid operation and +// a valid operation. +// +//===----------------------------------------------------------------------===// + +#include "mlir/Dialect/Func/IR/FuncOps.h" +#include "mlir/Pass/Pass.h" +#include "llvm/Support/raw_ostream.h" + +using namespace mlir; + +namespace { +struct TestPrintInvalidPass + : public PassWrapper> { + StringRef getArgument() const final { return "test-print-invalid"; } + StringRef getDescription() const final { + return "Test printing invalid ops."; + } + void getDependentDialects(DialectRegistry ®istry) const { + registry.insert(); + } + + void runOnOperation() override { + Location loc = getOperation().getLoc(); + OpBuilder builder(getOperation().body()); + auto funcOp = builder.create( + loc, "test", FunctionType::get(getOperation().getContext(), {}, {})); + funcOp.addEntryBlock(); + // The created function is invalid because there is no return op. + llvm::outs() << "Invalid operation:\n" << funcOp << "\n"; + builder.setInsertionPointToEnd(&funcOp.getBody().front()); + builder.create(loc); + // Now this function is valid. + llvm::outs() << "Valid operation:\n" << funcOp << "\n"; + funcOp.erase(); + } +}; +} // namespace + +namespace mlir { +void registerTestPrintInvalidPass() { + PassRegistration{}; +} +} // namespace mlir diff --git a/mlir/tools/mlir-linalg-ods-gen/mlir-linalg-ods-yaml-gen.cpp b/mlir/tools/mlir-linalg-ods-gen/mlir-linalg-ods-yaml-gen.cpp --- a/mlir/tools/mlir-linalg-ods-gen/mlir-linalg-ods-yaml-gen.cpp +++ b/mlir/tools/mlir-linalg-ods-gen/mlir-linalg-ods-yaml-gen.cpp @@ -16,7 +16,7 @@ #include "mlir/IR/AffineMap.h" #include "mlir/IR/MLIRContext.h" -#include "mlir/Parser.h" +#include "mlir/Parser/Parser.h" #include "mlir/Support/FileUtilities.h" #include "mlir/Support/LLVM.h" #include "llvm/ADT/Optional.h" diff --git a/mlir/tools/mlir-opt/mlir-opt.cpp b/mlir/tools/mlir-opt/mlir-opt.cpp --- a/mlir/tools/mlir-opt/mlir-opt.cpp +++ b/mlir/tools/mlir-opt/mlir-opt.cpp @@ -18,7 +18,7 @@ #include "mlir/Pass/Pass.h" #include "mlir/Pass/PassManager.h" #include "mlir/Support/FileUtilities.h" -#include "mlir/Support/MlirOptMain.h" +#include "mlir/Tools/mlir-opt/MlirOptMain.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/InitLLVM.h" #include "llvm/Support/SourceMgr.h" @@ -45,6 +45,7 @@ void registerTestMatchers(); void registerTestOperationEqualPass(); void registerTestPrintDefUsePass(); +void registerTestPrintInvalidPass(); void registerTestPrintNestingPass(); void registerTestReducer(); void registerTestSpirvEntryPointABIPass(); @@ -132,6 +133,7 @@ registerTestMatchers(); registerTestOperationEqualPass(); registerTestPrintDefUsePass(); + registerTestPrintInvalidPass(); registerTestPrintNestingPass(); registerTestReducer(); registerTestSpirvEntryPointABIPass(); diff --git a/mlir/tools/mlir-spirv-cpu-runner/CMakeLists.txt b/mlir/tools/mlir-spirv-cpu-runner/CMakeLists.txt --- a/mlir/tools/mlir-spirv-cpu-runner/CMakeLists.txt +++ b/mlir/tools/mlir-spirv-cpu-runner/CMakeLists.txt @@ -29,7 +29,7 @@ MLIRSPIRV MLIRTargetLLVMIRExport MLIRTransforms - MLIRTranslation + MLIRTranslateLib MLIRSupport ) endif() diff --git a/mlir/tools/mlir-tblgen/OpFormatGen.cpp b/mlir/tools/mlir-tblgen/OpFormatGen.cpp --- a/mlir/tools/mlir-tblgen/OpFormatGen.cpp +++ b/mlir/tools/mlir-tblgen/OpFormatGen.cpp @@ -301,8 +301,8 @@ }; OperationFormat(const Operator &op) - : allOperands(false), allOperandTypes(false), allResultTypes(false), - infersResultTypes(false) { + + { operandTypes.resize(op.getNumOperands(), TypeResolution()); resultTypes.resize(op.getNumResults(), TypeResolution()); @@ -346,10 +346,10 @@ /// A flag indicating if all operand/result types were seen. If the format /// contains these, it can not contain individual type resolvers. - bool allOperands, allOperandTypes, allResultTypes; + bool allOperands = false, allOperandTypes = false, allResultTypes = false; /// A flag indicating if this operation infers its result types - bool infersResultTypes; + bool infersResultTypes = false; /// A flag indicating if this operation has the SingleBlockImplicitTerminator /// trait. @@ -2851,7 +2851,7 @@ if (failed(lelement)) return failure(); literalElements.push_back(*lelement); - parsingElements.push_back(std::vector()); + parsingElements.emplace_back(); std::vector &currParsingElements = parsingElements.back(); while (peekToken().getKind() != FormatToken::pipe && peekToken().getKind() != FormatToken::r_paren) { diff --git a/mlir/tools/mlir-tblgen/RewriterGen.cpp b/mlir/tools/mlir-tblgen/RewriterGen.cpp --- a/mlir/tools/mlir-tblgen/RewriterGen.cpp +++ b/mlir/tools/mlir-tblgen/RewriterGen.cpp @@ -243,7 +243,7 @@ StaticMatcherHelper &staticMatcherHelper; // The next unused ID for newly created values. - unsigned nextValueId; + unsigned nextValueId = 0; raw_indented_ostream os; @@ -333,8 +333,7 @@ PatternEmitter::PatternEmitter(Record *pat, RecordOperatorMap *mapper, raw_ostream &os, StaticMatcherHelper &helper) : loc(pat->getLoc()), opMap(mapper), pattern(pat, mapper), - symbolInfoMap(pat->getLoc()), staticMatcherHelper(helper), nextValueId(0), - os(os) { + symbolInfoMap(pat->getLoc()), staticMatcherHelper(helper), os(os) { fmtCtx.withBuilder("rewriter"); } diff --git a/mlir/tools/mlir-translate/CMakeLists.txt b/mlir/tools/mlir-translate/CMakeLists.txt --- a/mlir/tools/mlir-translate/CMakeLists.txt +++ b/mlir/tools/mlir-translate/CMakeLists.txt @@ -18,7 +18,7 @@ MLIRParser MLIRPass MLIRSPIRV - MLIRTranslation + MLIRTranslateLib MLIRSupport ) diff --git a/mlir/tools/mlir-translate/mlir-translate.cpp b/mlir/tools/mlir-translate/mlir-translate.cpp --- a/mlir/tools/mlir-translate/mlir-translate.cpp +++ b/mlir/tools/mlir-translate/mlir-translate.cpp @@ -13,7 +13,7 @@ #include "mlir/InitAllTranslations.h" #include "mlir/Support/LogicalResult.h" -#include "mlir/Translation.h" +#include "mlir/Tools/mlir-translate/MlirTranslateMain.h" using namespace mlir; diff --git a/mlir/tools/mlir-vulkan-runner/CMakeLists.txt b/mlir/tools/mlir-vulkan-runner/CMakeLists.txt --- a/mlir/tools/mlir-vulkan-runner/CMakeLists.txt +++ b/mlir/tools/mlir-vulkan-runner/CMakeLists.txt @@ -73,7 +73,7 @@ MLIRSupport MLIRTargetLLVMIRExport MLIRTransforms - MLIRTranslation + MLIRTranslateLib ${Vulkan_LIBRARY} ) diff --git a/mlir/unittests/Dialect/Affine/Analysis/AffineStructuresParser.cpp b/mlir/unittests/Dialect/Affine/Analysis/AffineStructuresParser.cpp --- a/mlir/unittests/Dialect/Affine/Analysis/AffineStructuresParser.cpp +++ b/mlir/unittests/Dialect/Affine/Analysis/AffineStructuresParser.cpp @@ -8,7 +8,7 @@ #include "./AffineStructuresParser.h" #include "mlir/IR/IntegerSet.h" -#include "mlir/Parser.h" +#include "mlir/Parser/Parser.h" using namespace mlir; diff --git a/mlir/unittests/ExecutionEngine/Invoke.cpp b/mlir/unittests/ExecutionEngine/Invoke.cpp --- a/mlir/unittests/ExecutionEngine/Invoke.cpp +++ b/mlir/unittests/ExecutionEngine/Invoke.cpp @@ -20,7 +20,7 @@ #include "mlir/ExecutionEngine/RunnerUtils.h" #include "mlir/IR/MLIRContext.h" #include "mlir/InitAllDialects.h" -#include "mlir/Parser.h" +#include "mlir/Parser/Parser.h" #include "mlir/Pass/PassManager.h" #include "mlir/Target/LLVMIR/Dialect/LLVMIR/LLVMToLLVMIRTranslation.h" #include "mlir/Target/LLVMIR/Export.h" diff --git a/mlir/unittests/Interfaces/ControlFlowInterfacesTest.cpp b/mlir/unittests/Interfaces/ControlFlowInterfacesTest.cpp --- a/mlir/unittests/Interfaces/ControlFlowInterfacesTest.cpp +++ b/mlir/unittests/Interfaces/ControlFlowInterfacesTest.cpp @@ -12,7 +12,7 @@ #include "mlir/IR/DialectImplementation.h" #include "mlir/IR/OpDefinition.h" #include "mlir/IR/OpImplementation.h" -#include "mlir/Parser.h" +#include "mlir/Parser/Parser.h" #include diff --git a/mlir/unittests/Interfaces/DataLayoutInterfacesTest.cpp b/mlir/unittests/Interfaces/DataLayoutInterfacesTest.cpp --- a/mlir/unittests/Interfaces/DataLayoutInterfacesTest.cpp +++ b/mlir/unittests/Interfaces/DataLayoutInterfacesTest.cpp @@ -14,7 +14,7 @@ #include "mlir/IR/DialectImplementation.h" #include "mlir/IR/OpDefinition.h" #include "mlir/IR/OpImplementation.h" -#include "mlir/Parser.h" +#include "mlir/Parser/Parser.h" #include diff --git a/mlir/unittests/Interfaces/InferTypeOpInterfaceTest.cpp b/mlir/unittests/Interfaces/InferTypeOpInterfaceTest.cpp --- a/mlir/unittests/Interfaces/InferTypeOpInterfaceTest.cpp +++ b/mlir/unittests/Interfaces/InferTypeOpInterfaceTest.cpp @@ -16,7 +16,7 @@ #include "mlir/IR/ImplicitLocOpBuilder.h" #include "mlir/IR/OpDefinition.h" #include "mlir/IR/OpImplementation.h" -#include "mlir/Parser.h" +#include "mlir/Parser/Parser.h" #include diff --git a/mlir/unittests/Transforms/Canonicalizer.cpp b/mlir/unittests/Transforms/Canonicalizer.cpp --- a/mlir/unittests/Transforms/Canonicalizer.cpp +++ b/mlir/unittests/Transforms/Canonicalizer.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// #include "mlir/IR/PatternMatch.h" -#include "mlir/Parser.h" +#include "mlir/Parser/Parser.h" #include "mlir/Pass/PassManager.h" #include "mlir/Transforms/GreedyPatternRewriteDriver.h" #include "mlir/Transforms/Passes.h" diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -1483,11 +1483,9 @@ includes = ["include"], deps = [ ":ArmSVEIncGen", - ":FuncDialect", ":IR", ":LLVMDialect", ":SideEffectInterfaces", - ":VectorOps", "//llvm:Core", "//llvm:Support", ], @@ -1504,7 +1502,6 @@ ":IR", ":LLVMCommonConversion", ":LLVMDialect", - ":Pass", ":TransformUtils", "//llvm:Core", "//llvm:Support", @@ -1792,7 +1789,6 @@ deps = [ ":Affine", ":AffineAnalysis", - ":Analysis", ":ArithmeticDialect", ":BufferizationDialect", ":BufferizationTransforms", @@ -1937,7 +1933,6 @@ ":ArithmeticDialect", ":IR", ":LinalgOps", - ":SparseTensor", "//llvm:Support", ], ) @@ -1967,7 +1962,6 @@ ":Pass", ":SCFDialect", ":SCFTransforms", - ":SCFUtils", ":SparseTensor", ":SparseTensorPassIncGen", ":SparseTensorUtils", @@ -2103,7 +2097,6 @@ ":LoopLikeInterface", ":MemRefDialect", ":SideEffectInterfaces", - ":Support", ":TensorDialect", "//llvm:Support", ], @@ -2119,7 +2112,6 @@ ]), includes = ["include"], deps = [ - ":Dialect", ":EmitCAttributesIncGen", ":EmitCOpsIncGen", ":IR", @@ -2140,12 +2132,9 @@ deps = [ ":AsyncOpsIncGen", ":ControlFlowInterfaces", - ":Dialect", - ":FuncDialect", ":IR", ":InferTypeOpInterface", ":SideEffectInterfaces", - ":Support", "//llvm:Support", ], ) @@ -2713,7 +2702,10 @@ "lib/Dialect/Shape/Transforms/*.cpp", "lib/Dialect/Shape/Transforms/*.h", ]), - hdrs = ["include/mlir/Dialect/Shape/Transforms/Passes.h"], + hdrs = [ + "include/mlir/Dialect/Shape/Transforms/BufferizableOpInterfaceImpl.h", + "include/mlir/Dialect/Shape/Transforms/Passes.h", + ], includes = ["include"], deps = [ ":ArithmeticDialect", @@ -2793,13 +2785,10 @@ includes = ["include"], deps = [ ":ArithmeticDialect", - ":CallOpInterfaces", - ":CastOpInterfaces", ":CommonFolders", ":ControlFlowInterfaces", ":ControlFlowOpsIncGen", ":IR", - ":InferTypeOpInterface", ":SideEffectInterfaces", ":Support", "//llvm:Support", @@ -2822,7 +2811,6 @@ includes = ["include"], deps = [ ":ArithmeticDialect", - ":ArithmeticUtils", ":CallOpInterfaces", ":CastOpInterfaces", ":CommonFolders", @@ -2833,7 +2821,6 @@ ":InferTypeOpInterface", ":SideEffectInterfaces", ":Support", - ":VectorInterfaces", "//llvm:Support", ], ) @@ -2878,19 +2865,14 @@ hdrs = glob(["include/mlir/Dialect/Func/Transforms/*.h"]), includes = ["include"], deps = [ - ":Affine", - ":ArithmeticDialect", - ":ArithmeticTransforms", ":BufferizationDialect", ":BufferizationTransforms", ":FuncDialect", ":FuncTransformsPassIncGen", ":IR", - ":MemRefDialect", # TODO: Remove dependency on MemRef dialect ":Pass", ":SCFDialect", ":Support", - ":TensorDialect", ":Transforms", "//llvm:Support", ], @@ -2908,8 +2890,6 @@ ]), includes = ["include"], deps = [ - ":Affine", - ":AffineAnalysis", ":ArithmeticDialect", ":ArithmeticUtils", ":DialectUtils", @@ -2956,19 +2936,15 @@ includes = ["include"], deps = [ ":Affine", - ":AffineAnalysis", - ":Analysis", ":ArithmeticDialect", ":BufferizationDialect", ":BufferizationTransforms", ":DialectUtils", - ":FuncDialect", ":IR", ":LinalgOps", ":MemRefDialect", ":Pass", ":SCFDialect", - ":Support", ":TensorDialect", ":Transforms", ":VectorInterfaces", @@ -2994,13 +2970,11 @@ ":Affine", ":AffineAnalysis", ":ArithmeticDialect", - ":DialectUtils", ":FuncDialect", ":IR", ":MemRefDialect", ":Support", ":TensorDialect", - ":VectorInterfaces", ":VectorOps", "//llvm:Support", ], @@ -3008,20 +2982,11 @@ cc_library( name = "Support", - srcs = glob( - [ - "lib/Support/*.cpp", - "lib/Support/*.h", - ], - exclude = [ - # TODO(jpienaar): Move this out, else Support depends on Analysis/ - "lib/Support/MlirOptMain.cpp", - ], - ), - hdrs = glob( - ["include/mlir/Support/*.h"], - exclude = ["include/mlir/Support/MlirOptMain.h"], - ), + srcs = glob([ + "lib/Support/*.cpp", + "lib/Support/*.h", + ]), + hdrs = glob(["include/mlir/Support/*.h"]), includes = ["include"], deps = ["//llvm:Support"], ) @@ -3064,9 +3029,7 @@ ]), hdrs = glob([ "include/mlir/Parser/*.h", - ]) + [ - "include/mlir/Parser.h", - ], + ]), includes = ["include"], deps = [ ":IR", @@ -3794,12 +3757,10 @@ hdrs = ["include/mlir/Dialect/LLVMIR/NVVMDialect.h"], includes = ["include"], deps = [ - ":FuncDialect", ":IR", ":LLVMDialect", ":NVVMOpsIncGen", ":SideEffectInterfaces", - ":Support", "//llvm:AsmParser", "//llvm:Core", "//llvm:Support", @@ -3891,12 +3852,10 @@ hdrs = ["include/mlir/Dialect/LLVMIR/ROCDLDialect.h"], includes = ["include"], deps = [ - ":FuncDialect", ":IR", ":LLVMDialect", ":ROCDLOpsIncGen", ":SideEffectInterfaces", - ":Support", "//llvm:AsmParser", "//llvm:Core", "//llvm:Support", @@ -3976,7 +3935,6 @@ ":PDLOpsIncGen", ":PDLTypesIncGen", ":SideEffects", - ":Support", "//llvm:Support", ], ) @@ -4054,7 +4012,6 @@ ":PDLDialect", ":PDLInterpOpsIncGen", ":SideEffects", - ":Support", "//llvm:Support", ], ) @@ -4263,7 +4220,6 @@ ":IR", ":InferTypeOpInterface", ":Parser", - ":Pass", ":SPIRVAttrUtilsGen", ":SPIRVAvailabilityIncGen", ":SPIRVCanonicalizationIncGen", @@ -4305,7 +4261,6 @@ includes = ["include"], deps = [ ":SPIRVDialect", - ":Support", "//llvm:Support", ], ) @@ -4317,7 +4272,6 @@ includes = ["include"], deps = [ ":SPIRVDialect", - ":Support", ":TransformUtils", "//llvm:Support", ], @@ -4344,7 +4298,6 @@ ":SPIRVDialect", ":SPIRVPassIncGen", ":SPIRVUtils", - ":Support", ":Transforms", "//llvm:Support", ], @@ -4355,9 +4308,7 @@ hdrs = ["lib/Conversion/SPIRVCommon/Pattern.h"], includes = ["include"], deps = [ - ":IR", ":SPIRVDialect", - ":Support", ":Transforms", ], ) @@ -4461,7 +4412,6 @@ hdrs = ["include/mlir/Target/SPIRV/SPIRVBinaryUtils.h"], includes = ["include"], deps = [ - ":IR", ":SPIRVAttrUtilsGen", ":SPIRVDialect", ":SPIRVOpsIncGen", @@ -4614,7 +4564,6 @@ ":IR", ":InferTypeOpInterface", ":SideEffectInterfaces", - ":Support", ":TensorOpsIncGen", ":TilingInterface", ":ViewLikeInterface", @@ -4662,8 +4611,6 @@ deps = [ ":Affine", ":ArithmeticDialect", - ":IR", - ":Support", ":TensorDialect", "//llvm:Support", ], @@ -4702,7 +4649,6 @@ includes = ["include"], deps = [ ":ArithmeticDialect", - ":Async", ":BufferizationDialect", ":BufferizationTransforms", ":DialectUtils", @@ -4712,7 +4658,6 @@ ":ParallelLoopMapperAttrGen", ":Pass", ":SCFDialect", - ":Support", ":TensorDialect", ":TensorPassIncGen", ":Transforms", @@ -4790,7 +4735,6 @@ deps = [ ":DerivedAttributeOpInterfaceIncGen", ":IR", - ":Support", "//llvm:Support", ], ) @@ -4951,7 +4895,6 @@ deps = [ ":Analysis", ":ControlFlowInterfaces", - ":CopyOpInterface", ":IR", ":LoopLikeInterface", ":Pass", @@ -5360,7 +5303,6 @@ deps = [ ":CallOpInterfacesIncGen", ":IR", - ":Support", "//llvm:Support", ], ) @@ -5391,7 +5333,6 @@ deps = [ ":CastOpInterfacesIncGen", ":IR", - ":Support", "//llvm:Support", ], ) @@ -5422,7 +5363,6 @@ deps = [ ":ControlFlowInterfacesIncGen", ":IR", - ":Support", "//llvm:Support", ], ) @@ -5484,7 +5424,6 @@ deps = [ ":IR", ":SideEffectInterfacesIncGen", - ":Support", "//llvm:Support", ], ) @@ -5548,11 +5487,8 @@ cc_library( name = "Translation", - srcs = glob([ - "lib/Translation/*.cpp", - "lib/Translation/*.h", - ]), - hdrs = ["include/mlir/Translation.h"], + srcs = glob(["lib/Tools/mlir-translate/*.cpp"]), + hdrs = glob(["include/mlir/Tools/mlir-translate/*.h"]), includes = ["include"], deps = [ ":IR", @@ -5601,7 +5537,6 @@ ":AMX", ":AMXConversionIncGen", ":IR", - ":Support", ":ToLLVMIRTranslation", "//llvm:Core", "//llvm:Support", @@ -5615,7 +5550,6 @@ includes = ["include"], deps = [ ":IR", - ":Support", ":ToLLVMIRTranslation", ":X86Vector", ":X86VectorConversionIncGen", @@ -5634,7 +5568,6 @@ ":ArmNeonConversionIncGen", ":ArmNeonIncGen", ":IR", - ":Support", ":ToLLVMIRTranslation", "//llvm:Core", "//llvm:Support", @@ -5650,7 +5583,6 @@ ":ArmSVE", ":ArmSVEConversionIncGen", ":IR", - ":Support", ":ToLLVMIRTranslation", "//llvm:Core", "//llvm:Support", @@ -5666,7 +5598,6 @@ ":IR", ":NVVMConversionIncGen", ":NVVMDialect", - ":Support", ":ToLLVMIRTranslation", "//llvm:Core", "//llvm:Support", @@ -5682,7 +5613,6 @@ ":IR", ":ROCDLConversionIncGen", ":ROCDLDialect", - ":Support", ":ToLLVMIRTranslation", "//llvm:Core", "//llvm:Support", @@ -5812,9 +5742,7 @@ ":LLVMDialect", ":Support", ":ToLLVMIRTranslation", - ":Translation", "//llvm:AllTargetsAsmParsers", - "//llvm:BitReader", "//llvm:BitWriter", "//llvm:Core", "//llvm:ExecutionEngine", @@ -5846,8 +5774,8 @@ cc_library( name = "MlirOptLib", - srcs = ["lib/Support/MlirOptMain.cpp"], - hdrs = ["include/mlir/Support/MlirOptMain.h"], + srcs = ["lib/Tools/mlir-opt/MlirOptMain.cpp"], + hdrs = ["include/mlir/Tools/mlir-opt/MlirOptMain.h"], includes = ["include"], deps = [ ":IR", @@ -5875,8 +5803,6 @@ deps = [ ":AllPassesAndDialects", ":AllTranslations", - ":IR", - ":Parser", ":Support", ":Translation", "//llvm:Support", @@ -6067,7 +5993,6 @@ ":OpenACCToLLVMIRTranslation", ":OpenMPToLLVMIRTranslation", ":Parser", - ":Pass", ":SCFToStandard", ":Support", "//llvm:Core", @@ -6237,7 +6162,6 @@ ":GPUDialect", ":GPUToSPIRV", ":GPUTransforms", - ":IR", ":LLVMDialect", ":LLVMToLLVMIRTranslation", ":MemRefDialect", @@ -6272,7 +6196,6 @@ srcs = ["tools/mlir-tblgen/mlir-tblgen.cpp"], includes = ["include"], deps = [ - ":Support", ":TableGen", "//llvm:Support", "//llvm:TableGen", @@ -7129,7 +7052,6 @@ ":FuncDialect", ":FuncTransforms", ":IR", - ":InferTypeOpInterface", ":LinalgOps", ":LinalgPassIncGen", ":LinalgStructuredOpsIncGen", @@ -7169,11 +7091,9 @@ deps = [ ":BufferizationDialect", ":BufferizationTransforms", - ":DialectUtils", ":FuncDialect", ":IR", ":MemRefDialect", - ":Support", "//llvm:Support", ], ) @@ -7620,8 +7540,6 @@ ":IR", ":InferTypeOpInterface", ":SideEffectInterfaces", - ":Support", - ":VectorInterfaces", "//llvm:Support", ], ) @@ -7788,7 +7706,6 @@ ":IR", ":InferTypeOpInterface", ":SideEffectInterfaces", - ":Support", ":VectorInterfaces", "//llvm:Support", ], @@ -7831,7 +7748,6 @@ ":IR", ":MemRefDialect", ":Pass", - ":Support", ":TransformUtils", ":Transforms", "//llvm:Support", @@ -7928,7 +7844,6 @@ ":MathBaseIncGen", ":MathOpsIncGen", ":SideEffectInterfaces", - ":Support", ":VectorInterfaces", "//llvm:Support", ], @@ -7948,9 +7863,6 @@ ":FuncDialect", ":IR", ":MathDialect", - ":Pass", - ":SCFDialect", - ":Support", ":Transforms", ":VectorOps", ":VectorUtils", @@ -8115,7 +8027,6 @@ ":MemRefDialect", ":MemRefPassIncGen", ":Pass", - ":Support", ":TensorDialect", ":Transforms", ":VectorOps", @@ -8236,15 +8147,11 @@ ":BufferizableOpInterfaceIncGen", ":BufferizationBaseIncGen", ":BufferizationOpsIncGen", - ":ControlFlowInterfaces", - ":CopyOpInterface", ":FuncDialect", ":IR", - ":InferTypeOpInterface", ":MemRefDialect", ":Support", ":TensorDialect", - ":ViewLikeInterface", "//llvm:Support", ], ) @@ -8283,14 +8190,11 @@ ":BufferizationDialect", ":BufferizationPassIncGen", ":ControlFlowInterfaces", - ":DialectUtils", ":FuncDialect", ":IR", - ":InferTypeOpInterface", ":LoopLikeInterface", ":MemRefDialect", ":Pass", - ":Support", ":Transforms", "//llvm:Support", ], @@ -8428,7 +8332,6 @@ ":AllPassesAndDialects", ":IR", ":MlirReduceLib", - ":Pass", "//llvm:Support", "//mlir/test:TestDialect", ],