diff --git a/clang-tools-extra/clang-include-fixer/find-all-symbols/STLPostfixHeaderMap.cpp b/clang-tools-extra/clang-include-fixer/find-all-symbols/STLPostfixHeaderMap.cpp --- a/clang-tools-extra/clang-include-fixer/find-all-symbols/STLPostfixHeaderMap.cpp +++ b/clang-tools-extra/clang-include-fixer/find-all-symbols/STLPostfixHeaderMap.cpp @@ -57,6 +57,16 @@ {"include/prfchwintrin.h$", ""}, {"include/rdseedintrin.h$", ""}, {"include/rtmintrin.h$", ""}, + {"include/rv32bintrin-builtins.h$", ""}, + {"include/rv32bintrin-emulation.h$", ""}, + {"include/rv32bintrin.h$", ""}, + {"include/rv64bintrin-asm.h$", ""}, + {"include/rv64bintrin-builtins.h$", ""}, + {"include/rv64bintrin-emulation.h$", ""}, + {"include/rv64bintrin.h$", ""}, + {"include/rvbintrin-asm.h$", ""}, + {"include/rvbintrin-emulation.h$", ""}, + {"include/rvintrin.h$", ""}, {"include/shaintrin.h$", ""}, {"include/smmintrin.h$", ""}, {"include/stdalign.h$", ""}, diff --git a/clang-tools-extra/clangd/index/CanonicalIncludes.cpp b/clang-tools-extra/clangd/index/CanonicalIncludes.cpp --- a/clang-tools-extra/clangd/index/CanonicalIncludes.cpp +++ b/clang-tools-extra/clangd/index/CanonicalIncludes.cpp @@ -150,6 +150,16 @@ {"include/prfchwintrin.h", ""}, {"include/rdseedintrin.h", ""}, {"include/rtmintrin.h", ""}, + {"include/rv32bintrin-builtins.h", ""}, + {"include/rv32bintrin-emulation.h", ""}, + {"include/rv32bintrin.h", ""}, + {"include/rv64bintrin-asm.h", ""}, + {"include/rv64bintrin-builtins.h", ""}, + {"include/rv64bintrin-emulation.h", ""}, + {"include/rv64bintrin.h", ""}, + {"include/rvbintrin-asm.h", ""}, + {"include/rvbintrin-emulation.h", ""}, + {"include/rvintrin.h", ""}, {"include/shaintrin.h", ""}, {"include/smmintrin.h", ""}, {"include/stdalign.h", ""}, diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt --- a/clang/lib/Headers/CMakeLists.txt +++ b/clang/lib/Headers/CMakeLists.txt @@ -86,6 +86,16 @@ ptwriteintrin.h rdseedintrin.h rtmintrin.h + rv32bintrin-builtins.h + rv32bintrin-emulation.h + rv32bintrin.h + rv64bintrin-asm.h + rv64bintrin-builtins.h + rv64bintrin-emulation.h + rv64bintrin.h + rvbintrin-asm.h + rvbintrin-emulation.h + rvintrin.h sgxintrin.h s390intrin.h shaintrin.h diff --git a/clang/lib/Headers/intrin.h b/clang/lib/Headers/intrin.h --- a/clang/lib/Headers/intrin.h +++ b/clang/lib/Headers/intrin.h @@ -28,6 +28,10 @@ #include #endif +#if defined(__riscv) +#include +#endif + /* For the definition of jmp_buf. */ #if __STDC_HOSTED__ #include diff --git a/clang/lib/Headers/rv32bintrin-builtins.h b/clang/lib/Headers/rv32bintrin-builtins.h new file mode 100644 --- /dev/null +++ b/clang/lib/Headers/rv32bintrin-builtins.h @@ -0,0 +1,65 @@ +/* ===-------- rv32bintrin-builtins.h --------------------------------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __RV32BINTRIN_BUILTINS_H +#define __RV32BINTRIN_BUILTINS_H + +#ifndef __RVINTRIN_H +#error \ + "Never use directly; include instead." +#endif + +// These are available regardless of whether or not the Bitmanip extension is +// enabled or not. + +// Our approach to these functions is to use inline asm when the Bitmanip +// extension is enabled and use replacment C code when it's disabled. However, +// where LLVM builtins are already avaiable we are making use of them. + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_clz(const uint_xlen_t rs1) { + // Calling these builtins with 0 results in undefined behaviour. + if (rs1 == 0) { + return XLEN; + } + return __builtin_clz(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_ctz(const uint_xlen_t rs1) { + if (rs1 == 0) { + return XLEN; + } + return __builtin_ctz(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_pcnt(const uint_xlen_t rs1) { + return __builtin_popcount(rs1); +} + +// Genric aliases +// e.g. _rv_* is an alias of _rv64_* + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_clz(const uint_xlen_t rs1) { + return _rv32_clz(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_ctz(const uint_xlen_t rs1) { + return _rv32_ctz(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_pcnt(const uint_xlen_t rs1) { + return _rv32_pcnt(rs1); +} + +#endif //__RV32BINTRIN_BUILTINS_H diff --git a/clang/lib/Headers/rv32bintrin-emulation.h b/clang/lib/Headers/rv32bintrin-emulation.h new file mode 100644 --- /dev/null +++ b/clang/lib/Headers/rv32bintrin-emulation.h @@ -0,0 +1,106 @@ +/* ===-------- rv32bintrin-emulation.h -------------------------------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __RV32BINTRIN_EMULATION_H +#define __RV32BINTRIN_EMULATION_H + +#ifndef __RVINTRIN_H +#error \ + "Never use directly; include instead." +#endif + +#include + +#include "rvbintrin-emulation.h" + +// If the builtins are unavailable then these provide the same functionality. + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS _rv_gorc(const uint32_t rs1, + const uint32_t rs2) { + uint32_t x = rs1; + const uint_xlen_t shamt = rs2 & 31; + if (shamt & 1) { + x |= ((x & 0x55555555) << 1) | ((x & 0xAAAAAAAA) >> 1); + } + if (shamt & 2) { + x |= ((x & 0x33333333) << 2) | ((x & 0xCCCCCCCC) >> 2); + } + if (shamt & 4) { + x |= ((x & 0x0F0F0F0F) << 4) | ((x & 0xF0F0F0F0) >> 4); + } + if (shamt & 8) { + x |= ((x & 0x00FF00FF) << 8) | ((x & 0xFF00FF00) >> 8); + } + if (shamt & 16) { + x |= ((x & 0x0000FFFF) << 16) | ((x & 0xFFFF0000) >> 16); + } + return x; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS _rv_grev(const uint32_t rs1, + const uint32_t rs2) { + uint32_t x = rs1; + const uint_xlen_t shamt = rs2 & 31; + if (shamt & 1) { + x = ((x & 0x55555555) << 1) | ((x & 0xAAAAAAAA) >> 1); + } + if (shamt & 2) { + x = ((x & 0x33333333) << 2) | ((x & 0xCCCCCCCC) >> 2); + } + if (shamt & 4) { + x = ((x & 0x0F0F0F0F) << 4) | ((x & 0xF0F0F0F0) >> 4); + } + if (shamt & 8) { + x = ((x & 0x00FF00FF) << 8) | ((x & 0xFF00FF00) >> 8); + } + if (shamt & 16) { + x = ((x & 0x0000FFFF) << 16) | ((x & 0xFFFF0000) >> 16); + } + return x; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS _rv_shfl(const uint32_t rs1, + const uint32_t rs2) { + uint32_t x = rs1; + const uint_xlen_t shamt = rs2 & 15; + if (shamt & 8) { + x = shuffle_stage(x, 0x00ff0000, 0x0000ff00, 8); + } + if (shamt & 4) { + x = shuffle_stage(x, 0x0f000f00, 0x00f000f0, 4); + } + if (shamt & 2) { + x = shuffle_stage(x, 0x30303030, 0x0c0c0c0c, 2); + } + if (shamt & 1) { + x = shuffle_stage(x, 0x44444444, 0x22222222, 1); + } + return x; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_unshfl(const uint32_t rs1, const uint32_t rs2) { + uint32_t x = rs1; + const uint_xlen_t shamt = rs2 & 15; + if (shamt & 1) { + x = shuffle_stage(x, 0x44444444, 0x22222222, 1); + } + if (shamt & 2) { + x = shuffle_stage(x, 0x30303030, 0x0c0c0c0c, 2); + } + if (shamt & 4) { + x = shuffle_stage(x, 0x0f000f00, 0x00f000f0, 4); + } + if (shamt & 8) { + x = shuffle_stage(x, 0x00ff0000, 0x0000ff00, 8); + } + return x; +} + +#endif //__RV32BINTRIN_EMULATION_H diff --git a/clang/lib/Headers/rv32bintrin.h b/clang/lib/Headers/rv32bintrin.h new file mode 100644 --- /dev/null +++ b/clang/lib/Headers/rv32bintrin.h @@ -0,0 +1,748 @@ +/* ===-------- rv32bintrin.h -----------------------------------------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __RV32BINTRIN_H +#define __RV32BINTRIN_H + +#ifndef __RVINTRIN_H +#error "Never use directly; include instead." +#endif + +#include "rv32bintrin-builtins.h" + +#if defined(__riscv_bitmanip) + +#include "rvbintrin-asm.h" + +#else // Bitmanip extension is disabled. + +#include "rv32bintrin-emulation.h" + +#endif // defined(__riscv_bitmanip) + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_andn(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_andn(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_fsl(const uint_xlen_t rs1, const uint_xlen_t rs2, const uint_xlen_t rs3) { + return _rv_fsl(rs1, rs2, rs3); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_fsr(const uint_xlen_t rs1, const uint_xlen_t rs2, const uint_xlen_t rs3) { + return _rv_fsr(rs1, rs2, rs3); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_max(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_max(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_maxu(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_maxu(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_min(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_min(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_minu(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_minu(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_orn(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_orn(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_rol(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_rol(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_ror(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_ror(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_bdep(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_bdep(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_bext(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_bext(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_bfp(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_bfp(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_clmul(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_clmul(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_clmulh(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_clmulh(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_clmulr(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_clmulr(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS _rv32_cmix( + const uint_xlen_t rs1, const uint_xlen_t rs2, const uint_xlen_t rs3) { + return _rv_cmix(rs1, rs2, rs3); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS _rv32_cmov( + const uint_xlen_t rs1, const uint_xlen_t rs2, const uint_xlen_t rs3) { + return _rv_cmov(rs1, rs2, rs3); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_crc32_b(const uint_xlen_t rs1) { + return _rv_crc32_b(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_crc32_h(const uint_xlen_t rs1) { + return _rv_crc32_h(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_crc32_w(const uint_xlen_t rs1) { + return _rv_crc32_w(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_crc32c_b(const uint_xlen_t rs1) { + return _rv_crc32c_b(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_crc32c_h(const uint_xlen_t rs1) { + return _rv_crc32c_b(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_crc32c_w(const uint_xlen_t rs1) { + return _rv_crc32c_b(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_gorc(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_gorc(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_grev(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_grev(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_pack(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_pack(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_packu(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_packu(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_packh(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_packh(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_sext_b(const uint_xlen_t rs1) { + return _rv_sext_b(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_sext_h(const uint_xlen_t rs1) { + return _rv_sext_h(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_sbclr(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_sbclr(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_sbext(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_sbext(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_sbinv(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_sbinv(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_sbset(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_sbset(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_shfl(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_shfl(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_slo(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_slo(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_sro(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_sro(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_unshfl(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_unshfl(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_xnor(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_xnor(rs1, rs2); +} + +// Pseudo Instructions + +// REV (GREVI) pseudo instructions + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_rev_p(const uint_xlen_t rs1) { + return _rv32_grev(rs1, 0b00001); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_rev2_n(const uint_xlen_t rs1) { + return _rv32_grev(rs1, 0b00010); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_rev_n(const uint_xlen_t rs1) { + return _rv32_grev(rs1, 0b00011); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_rev4_b(const uint_xlen_t rs1) { + return _rv32_grev(rs1, 0b00100); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_rev2_b(const uint_xlen_t rs1) { + return _rv32_grev(rs1, 0b00110); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_rev_b(const uint_xlen_t rs1) { + return _rv32_grev(rs1, 0b00111); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_rev8_h(const uint_xlen_t rs1) { + return _rv32_grev(rs1, 0b01000); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_rev4_h(const uint_xlen_t rs1) { + return _rv32_grev(rs1, 0b01100); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_rev2_h(const uint_xlen_t rs1) { + return _rv32_grev(rs1, 0b01110); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_rev_h(const uint_xlen_t rs1) { + return _rv32_grev(rs1, 0b01111); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_rev16(const uint_xlen_t rs1) { + return _rv32_grev(rs1, 0b10000); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_rev8(const uint_xlen_t rs1) { + return _rv32_grev(rs1, 0b11000); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_rev4(const uint_xlen_t rs1) { + return _rv32_grev(rs1, 0b11100); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_rev2(const uint_xlen_t rs1) { + return _rv32_grev(rs1, 0b11110); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_rev(const uint_xlen_t rs1) { + return _rv32_grev(rs1, 0b11111); +} + +// ORC (GORCI) pseudo instructions + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_orc_p(const uint_xlen_t rs1) { + return _rv32_gorc(rs1, 0b00001); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_orc2_n(const uint_xlen_t rs1) { + return _rv32_gorc(rs1, 0b00010); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_orc_n(const uint_xlen_t rs1) { + return _rv32_gorc(rs1, 0b00011); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_orc4_b(const uint_xlen_t rs1) { + return _rv32_gorc(rs1, 0b00100); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_orc2_b(const uint_xlen_t rs1) { + return _rv32_gorc(rs1, 0b00110); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_orc_b(const uint_xlen_t rs1) { + return _rv32_gorc(rs1, 0b00111); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_orc8_h(const uint_xlen_t rs1) { + return _rv32_gorc(rs1, 0b01000); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_orc4_h(const uint_xlen_t rs1) { + return _rv32_gorc(rs1, 0b01100); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_orc2_h(const uint_xlen_t rs1) { + return _rv32_gorc(rs1, 0b01110); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_orc_h(const uint_xlen_t rs1) { + return _rv32_gorc(rs1, 0b01111); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_orc16(const uint_xlen_t rs1) { + return _rv32_gorc(rs1, 0b10000); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_orc8(const uint_xlen_t rs1) { + return _rv32_gorc(rs1, 0b11000); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_orc4(const uint_xlen_t rs1) { + return _rv32_gorc(rs1, 0b11100); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_orc2(const uint_xlen_t rs1) { + return _rv32_gorc(rs1, 0b11110); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_orc(const uint_xlen_t rs1) { + return _rv32_gorc(rs1, 0b11111); +} + +// ZIP (SHFLI) pseudo instructions + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_zip_n(const uint_xlen_t rs1) { + return _rv32_shfl(rs1, 0b0001); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_zip2_b(const uint_xlen_t rs1) { + return _rv32_shfl(rs1, 0b0010); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_zip_b(const uint_xlen_t rs1) { + return _rv32_shfl(rs1, 0b0011); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_zip4_h(const uint_xlen_t rs1) { + return _rv32_shfl(rs1, 0b0100); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_zip2_h(const uint_xlen_t rs1) { + return _rv32_shfl(rs1, 0b0110); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_zip_h(const uint_xlen_t rs1) { + return _rv32_shfl(rs1, 0b0111); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_zip8(const uint_xlen_t rs1) { + return _rv32_shfl(rs1, 0b1000); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_zip4(const uint_xlen_t rs1) { + return _rv32_shfl(rs1, 0b1100); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_zip2(const uint_xlen_t rs1) { + return _rv32_shfl(rs1, 0b1110); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_zip(const uint_xlen_t rs1) { + return _rv32_shfl(rs1, 0b1111); +} + +// UNZIP (UNSHFL) pseudo instructions + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_unzip_n(const uint_xlen_t rs1) { + return _rv32_unshfl(rs1, 0b0001); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_unzip2_b(const uint_xlen_t rs1) { + return _rv32_unshfl(rs1, 0b0010); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_unzip_b(const uint_xlen_t rs1) { + return _rv32_unshfl(rs1, 0b0011); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_unzip4_h(const uint_xlen_t rs1) { + return _rv32_unshfl(rs1, 0b0100); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_unzip2_h(const uint_xlen_t rs1) { + return _rv32_unshfl(rs1, 0b0110); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_unzip_h(const uint_xlen_t rs1) { + return _rv32_unshfl(rs1, 0b0111); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_unzip8(const uint_xlen_t rs1) { + return _rv32_unshfl(rs1, 0b1000); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_unzip4(const uint_xlen_t rs1) { + return _rv32_unshfl(rs1, 0b1100); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_unzip2(const uint_xlen_t rs1) { + return _rv32_unshfl(rs1, 0b1110); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_unzip(const uint_xlen_t rs1) { + return _rv32_unshfl(rs1, 0b1111); +} + +// Genric aliases for 32 bit pseudo instructions +// e.g. _rv_* is an alias of _rv32_* + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_rev_p(const uint_xlen_t rs1) { + return _rv32_rev_p(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_rev2_n(const uint_xlen_t rs1) { + return _rv32_rev2_n(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_rev_n(const uint_xlen_t rs1) { + return _rv32_rev_n(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_rev4_b(const uint_xlen_t rs1) { + return _rv32_rev4_b(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_rev2_b(const uint_xlen_t rs1) { + return _rv32_rev2_b(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_rev_b(const uint_xlen_t rs1) { + return _rv32_rev_b(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_rev8_h(const uint_xlen_t rs1) { + return _rv32_rev8_h(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_rev4_h(const uint_xlen_t rs1) { + return _rv32_rev4_h(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_rev2_h(const uint_xlen_t rs1) { + return _rv32_rev2_h(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_rev_h(const uint_xlen_t rs1) { + return _rv32_rev_h(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_rev16(const uint_xlen_t rs1) { + return _rv32_rev16(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_rev8(const uint_xlen_t rs1) { + return _rv32_rev8(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_rev4(const uint_xlen_t rs1) { + return _rv32_rev4(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_rev2(const uint_xlen_t rs1) { + return _rv32_rev2(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_rev(const uint_xlen_t rs1) { + return _rv32_rev(rs1); +} + +// ORC (GORCI) pseudo instructions + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_orc_p(const uint_xlen_t rs1) { + return _rv32_orc_p(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_orc2_n(const uint_xlen_t rs1) { + return _rv32_orc2_n(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_orc_n(const uint_xlen_t rs1) { + return _rv32_orc_n(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_orc4_b(const uint_xlen_t rs1) { + return _rv32_orc4_b(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_orc2_b(const uint_xlen_t rs1) { + return _rv32_orc2_b(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_orc_b(const uint_xlen_t rs1) { + return _rv32_orc_b(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_orc8_h(const uint_xlen_t rs1) { + return _rv32_orc8_h(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_orc4_h(const uint_xlen_t rs1) { + return _rv32_orc4_h(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_orc2_h(const uint_xlen_t rs1) { + return _rv32_orc2_h(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_orc_h(const uint_xlen_t rs1) { + return _rv32_orc_h(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_orc16(const uint_xlen_t rs1) { + return _rv32_orc16(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_orc8(const uint_xlen_t rs1) { + return _rv32_orc8(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_orc4(const uint_xlen_t rs1) { + return _rv32_orc4(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_orc2(const uint_xlen_t rs1) { + return _rv32_orc2(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_orc(const uint_xlen_t rs1) { + return _rv32_orc(rs1); +} + +// ZIP (SHFLI) pseudo instructions + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_zip_n(const uint_xlen_t rs1) { + return _rv32_zip_n(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_zip2_b(const uint_xlen_t rs1) { + return _rv32_zip2_b(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_zip_b(const uint_xlen_t rs1) { + return _rv32_zip_b(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_zip4_h(const uint_xlen_t rs1) { + return _rv32_zip4_h(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_zip2_h(const uint_xlen_t rs1) { + return _rv32_zip2_h(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_zip_h(const uint_xlen_t rs1) { + return _rv32_zip_h(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_zip8(const uint_xlen_t rs1) { + return _rv32_zip8(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_zip4(const uint_xlen_t rs1) { + return _rv32_zip4(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_zip2(const uint_xlen_t rs1) { + return _rv32_zip2(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_zip(const uint_xlen_t rs1) { + return _rv32_zip(rs1); +} + +// UNZIP (UNSHFL) pseudo instructions + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_unzip_n(const uint_xlen_t rs1) { + return _rv32_unzip_n(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_unzip2_b(const uint_xlen_t rs1) { + return _rv32_unzip2_b(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_unzip_b(const uint_xlen_t rs1) { + return _rv32_unzip_b(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_unzip4_h(const uint_xlen_t rs1) { + return _rv32_unzip4_h(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_unzip2_h(const uint_xlen_t rs1) { + return _rv32_unzip2_h(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_unzip_h(const uint_xlen_t rs1) { + return _rv32_unzip_h(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_unzip8(const uint_xlen_t rs1) { + return _rv32_unzip8(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_unzip4(const uint_xlen_t rs1) { + return _rv32_unzip4(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_unzip2(const uint_xlen_t rs1) { + return _rv32_unzip2(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_unzip(const uint_xlen_t rs1) { + return _rv32_unzip(rs1); +} + +#endif // __RV32BINTRIN_H diff --git a/clang/lib/Headers/rv64bintrin-asm.h b/clang/lib/Headers/rv64bintrin-asm.h new file mode 100644 --- /dev/null +++ b/clang/lib/Headers/rv64bintrin-asm.h @@ -0,0 +1,277 @@ +/* ===-------- rv64bintrin-asm.h -------------------------------------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __RV64BINTRIN_ASM_H +#define __RV64BINTRIN_ASM_H + +#ifndef __RVINTRIN_H +#error "Never use directly; include instead." +#endif + +#include + +#include "rvbintrin-asm.h" + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_fslw(const uint32_t rs1, + const uint32_t rs2, + const uint32_t rs3) { + uint_xlen_t rd; + if (__builtin_constant_p(rs2)) { + __asm__("fsriw %0, %1, %2, %3" : "=r"(rd) : "r"(rs1), "r"(rs2), "i"(rs3)); + } else { + __asm__("fslw %0, %1, %2, %3" : "=r"(rd) : "r"(rs1), "r"(rs2), "r"(rs3)); + } + return rd; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_fsrw(const uint32_t rs1, + const uint32_t rs2, + const uint32_t rs3) { + uint_xlen_t rd; + __asm__("fsrw %0, %1, %2, %3" : "=r"(rd) : "r"(rs1), "r"(rs2), "r"(rs3)); + return rd; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_rolw(const uint32_t rs1, + const uint32_t rs2) { + uint_xlen_t rd; + __asm__("rolw %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + return rd; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_rorw(const uint32_t rs1, + const uint32_t rs2) { + uint_xlen_t rd; + if (__builtin_constant_p(rs2)) { + __asm__("roriw %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(rs2)); + } else { + __asm__("rorw %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + } + return rd; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_addwu(const uint32_t rs1, + const uint32_t rs2) { + uint_xlen_t rd; + if (__builtin_constant_p(rs2)) { + __asm__("addiwu %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(rs2)); + } else { + __asm__("addwu %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + } + return rd; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_addu_w(const uint32_t rs1, + const uint32_t rs2) { + uint_xlen_t rd; + __asm__("addu.w %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + return rd; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_bdepw(const uint32_t rs1, + const uint32_t rs2) { + uint_xlen_t rd; + __asm__("bdepw %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + return rd; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_bfpw(const uint32_t rs1, + const uint32_t rs2) { + uint_xlen_t rd; + __asm__("bfpw %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + return rd; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_bextw(const uint32_t rs1, + const uint32_t rs2) { + uint_xlen_t rd; + __asm__("bextw %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + return rd; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_bmatflip(const uint32_t rs1) { + uint_xlen_t rd; + __asm__("bmatflip %0, %1" : "=r"(rd) : "r"(rs1)); + return rd; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_bmator(const uint32_t rs1, + const uint32_t rs2) { + uint_xlen_t rd; + __asm__("bmator %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + return rd; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_bmatxor(const uint32_t rs1, + const uint32_t rs2) { + uint_xlen_t rd; + __asm__("bmatxor %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + return rd; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_clmulhw(const uint32_t rs1, + const uint32_t rs2) { + uint_xlen_t rd; + __asm__("clmulhw %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + return rd; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_clmulrw(const uint32_t rs1, + const uint32_t rs2) { + uint_xlen_t rd; + __asm__("clmulrw %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + return rd; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_clmulw(const uint32_t rs1, + const uint32_t rs2) { + uint_xlen_t rd; + __asm__("clmulw %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + return rd; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_crc32_d(const uint32_t rs1) { + uint_xlen_t rd; + __asm__("crc32.d %0, %1" : "=r"(rd) : "r"(rs1)); + return rd; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_crc32c_d(const uint32_t rs1) { + uint_xlen_t rd; + __asm__("crc32c.d %0, %1" : "=r"(rd) : "r"(rs1)); + return rd; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_gorcw(const uint32_t rs1, + const uint32_t rs2) { + uint_xlen_t rd; + if (__builtin_constant_p(rs2)) { + __asm__("gorciw %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(rs2)); + } else { + __asm__("gorcw %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + } + return rd; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_grevw(const uint32_t rs1, + const uint32_t rs2) { + uint_xlen_t rd; + if (__builtin_constant_p(rs2)) { + __asm__("greviw %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(rs2)); + } else { + __asm__("grevw %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + } + return rd; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_packw(const uint32_t rs1, + const uint32_t rs2) { + uint_xlen_t rd; + __asm__("packw %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + return rd; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_packuw(const uint32_t rs1, + const uint32_t rs2) { + uint_xlen_t rd; + __asm__("packuw %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + return rd; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_sbclrw(const uint32_t rs1, + const uint32_t rs2) { + uint_xlen_t rd; + if (__builtin_constant_p(rs2)) { + __asm__("sbclriw %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(rs2)); + } else { + __asm__("sbclrw %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + } + return rd; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_sbextw(const uint32_t rs1, + const uint32_t rs2) { + uint_xlen_t rd; + __asm__("sbextw %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + return rd; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_sbinvw(const uint32_t rs1, + const uint32_t rs2) { + uint_xlen_t rd; + if (__builtin_constant_p(rs2)) { + __asm__("sbinviw %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(rs2)); + } else { + __asm__("sbinvw %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + } + return rd; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_sbsetw(const uint32_t rs1, + const uint32_t rs2) { + uint_xlen_t rd; + if (__builtin_constant_p(rs2)) { + __asm__("sbsetiw %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(rs2)); + } else { + __asm__("sbsetw %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + } + return rd; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_shflw(const uint32_t rs1, + const uint32_t rs2) { + uint_xlen_t rd; + __asm__("shflw %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + return rd; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_slow(const uint32_t rs1, + const uint32_t rs2) { + uint_xlen_t rd; + if (__builtin_constant_p(rs2)) { + __asm__("sloiw %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(rs2)); + } else { + __asm__("slow %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + } + return rd; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_srow(const uint32_t rs1, + const uint32_t rs2) { + uint_xlen_t rd; + if (__builtin_constant_p(rs2)) { + __asm__("sroiw %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(rs2)); + } else { + __asm__("srow %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + } + return rd; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_subu_w(const uint32_t rs1, + const uint32_t rs2) { + uint_xlen_t rd; + __asm__("subu.w %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + return rd; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_subwu(const uint32_t rs1, + const uint32_t rs2) { + uint_xlen_t rd; + __asm__("subwu %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + return rd; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_unshflw(const uint32_t rs1, + const uint32_t rs2) { + uint_xlen_t rd; + __asm__("unshflw %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + return rd; +} + +#endif //__RV64BINTRIN_ASM_H diff --git a/clang/lib/Headers/rv64bintrin-builtins.h b/clang/lib/Headers/rv64bintrin-builtins.h new file mode 100644 --- /dev/null +++ b/clang/lib/Headers/rv64bintrin-builtins.h @@ -0,0 +1,96 @@ +/* ===-------- rv64bintrin-builtins.h --------------------------------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __RV64BINTRIN_BUILTINS_H +#define __RV64BINTRIN_BUILTINS_H + +#ifndef __RVINTRIN_H +#error \ + "Never use directly; include instead." +#endif + +#include + +// These are available regardless of whether or not the Bitmanip extension is +// enabled or not. + +// Our approach to these functions is to use inline asm when the Bitmanip +// extension is enabled and use replacment C code when it's disabled. However, +// where LLVM builtins are already avaiable we are making use of them. + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_clz(const uint_xlen_t rs1) { + if (rs1 == 0) { + return XLEN; + } + return __builtin_clzll(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_ctz(const uint_xlen_t rs1) { + if (rs1 == 0) { + return XLEN; + } + return __builtin_ctzll(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_pcnt(const uint_xlen_t rs1) { + return __builtin_popcountll(rs1); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv64_clzw(const uint32_t rs1) { + if (rs1 == 0) { + return XLEN; + } + return __builtin_clz(rs1); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv64_ctzw(const uint32_t rs1) { + if (rs1 == 0) { + return XLEN; + } + return __builtin_ctz(rs1); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv64_pcntw(const uint32_t rs1) { + return __builtin_popcount(rs1); +} + +// Genric aliases +// e.g. _rv_* is an alias of _rv64_* + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_clz(const uint_xlen_t rs1) { + return _rv64_clz(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_ctz(const uint_xlen_t rs1) { + return _rv64_ctz(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_pcnt(const uint_xlen_t rs1) { + return _rv64_pcnt(rs1); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_clzw(const uint32_t rs1) { + return _rv64_clzw(rs1); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_ctzw(const uint32_t rs1) { + return _rv64_ctzw(rs1); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_pcntw(const uint32_t rs1) { + return _rv64_pcntw(rs1); +} + +#endif //__RV64BINTRIN_BUILTINS_H diff --git a/clang/lib/Headers/rv64bintrin-emulation.h b/clang/lib/Headers/rv64bintrin-emulation.h new file mode 100644 --- /dev/null +++ b/clang/lib/Headers/rv64bintrin-emulation.h @@ -0,0 +1,433 @@ +/* ===-------- rv64bintrin-emulation.h -------------------------------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __RV64INTRIN_EMULATION_H +#define __RV64INTRIN_EMULATION_H + +#ifndef __RVINTRIN_H +#error \ + "Never use directly; include instead." +#endif + +#include + +#include "rvbintrin-emulation.h" + +static __inline__ uint32_t shuffle32_stage(const uint32_t src, + const uint32_t maskL, + const uint32_t maskR, + const uint_xlen_t N) { + uint32_t x = src & ~(maskL | maskR); + x |= ((src << N) & maskL) | ((src >> N) & maskR); + return x; +} + +static __inline__ uint64_t __DEFAULT_FN_ATTRS _rv_shfl(const uint64_t rs1, + const uint64_t rs2) { + uint64_t x = rs1; + const uint_xlen_t shamt = rs2 & 31; + if (shamt & 16) { + x = shuffle_stage(x, 0x0000ffff00000000LL, 0x00000000ffff0000LL, 16); + } + if (shamt & 8) { + x = shuffle_stage(x, 0x00ff000000ff0000LL, 0x0000ff000000ff00LL, 8); + } + if (shamt & 4) { + x = shuffle_stage(x, 0x0f000f000f000f00LL, 0x00f000f000f000f0LL, 4); + } + if (shamt & 2) { + x = shuffle_stage(x, 0x3030303030303030LL, 0x0c0c0c0c0c0c0c0cLL, 2); + } + if (shamt & 1) { + x = shuffle_stage(x, 0x4444444444444444LL, 0x2222222222222222LL, 1); + } + + return x; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_shflw(const uint32_t rs1, + const uint32_t rs2) { + uint32_t x = rs1; + const uint_xlen_t shamt = rs2 & 15; + if (shamt & 8) { + x = shuffle32_stage(x, 0x00ff0000, 0x0000ff00, 8); + } + if (shamt & 4) { + x = shuffle32_stage(x, 0x0f000f00, 0x00f000f0, 4); + } + if (shamt & 2) { + x = shuffle32_stage(x, 0x30303030, 0x0c0c0c0c, 2); + } + if (shamt & 1) { + x = shuffle32_stage(x, 0x44444444, 0x22222222, 1); + } + return x; +} + +static __inline__ uint64_t __DEFAULT_FN_ATTRS _rv_bmatflip(const uint64_t rs1) { + uint64_t x = rs1; + x = _rv_shfl(x, 31); + x = _rv_shfl(x, 31); + x = _rv_shfl(x, 31); + return x; +} + +static __inline__ uint64_t __DEFAULT_FN_ATTRS _rv_bmator(const uint64_t rs1, + const uint64_t rs2) { + // transpose of rs2 + const uint64_t rs2t = _rv_bmatflip(rs2); + uint8_t u[8]; // rows of rs1 + uint8_t v[8]; // cols of rs2 + for (uint_xlen_t i = 0; i < 8; i++) { + u[i] = rs1 >> (i * 8); + v[i] = rs2t >> (i * 8); + } + uint64_t x = 0; + for (uint_xlen_t i = 0; i < 8; i++) { + if ((u[i / 8] & v[i % 8]) != 0) { + x |= 1LL << i; + } + } + return x; +} + +static __inline__ uint64_t __DEFAULT_FN_ATTRS _rv_bmatxor(const uint64_t rs1, + const uint64_t rs2) { + // transpose of rs2 + const uint64_t rs2t = _rv_bmatflip(rs2); + uint8_t u[8]; // rows of rs1 + uint8_t v[8]; // cols of rs2 + for (uint_xlen_t i = 0; i < 8; i++) { + u[i] = rs1 >> (i * 8); + v[i] = rs2t >> (i * 8); + } + uint64_t x = 0; + for (uint_xlen_t i = 0; i < 64; i++) { + if (_rv_pcnt(u[i / 8] & v[i % 8]) & 1) { + x |= 1LL << i; + } + } + return x; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_fslw(const uint32_t rs1, + const uint32_t rs2, + const uint32_t rs3) { + uint32_t shamt = rs2 & (2 * XLEN - 1); + uint32_t A = rs1; + uint32_t B = rs3; + if (shamt >= XLEN) { + shamt -= XLEN; + A = rs3; + B = rs1; + } + return shamt ? (A << shamt) | (B >> (XLEN - shamt)) : A; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_fsrw(const uint32_t rs1, + const uint32_t rs2, + const uint32_t rs3) { + uint32_t shamt = rs2 & (2 * XLEN - 1); + uint32_t A = rs1; + uint32_t B = rs3; + if (shamt >= XLEN) { + shamt -= XLEN; + A = rs3; + B = rs1; + } + return shamt ? (A >> shamt) | (B << (XLEN - shamt)) : A; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_rolw(const uint32_t rs1, + const uint32_t rs2) { + const uint32_t shamt = rs2 & (XLEN - 1); + return (rs1 << shamt) | (rs1 >> ((XLEN - shamt) & (XLEN - 1))); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_rorw(const uint32_t rs1, + const uint32_t rs2) { + const uint32_t shamt = rs2 & (XLEN - 1); + return (rs1 >> shamt) | (rs1 << ((XLEN - shamt) & (XLEN - 1))); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_addu_w(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return rs1 + (rs2 & 0xFFFFFFFF); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_addwu(const uint32_t rs1, + const uint32_t rs2) { + return (rs1 + rs2) & 0xFFFFFFFF; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_bdepw(const uint32_t rs1, + const uint32_t rs2) { + uint32_t r = 0; + for (uint_xlen_t i = 0, j = 0; i < XLEN; i++) { + if ((rs2 >> i) & 1) { + if ((rs1 >> j) & 1) { + r |= 1 << i; + } + j++; + } + } + return r; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_bextw(const uint32_t rs1, + const uint32_t rs2) { + uint32_t r = 0; + for (uint_xlen_t i = 0, j = 0; i < XLEN; i++) { + if ((rs2 >> i) & 1) { + if ((rs1 >> i) & 1) { + r |= 1 << j; + } + j++; + } + } + return r; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_bfpw(const uint32_t rs1, + const uint32_t rs2) { + uint_xlen_t len = (rs2 >> 24) & 15; + const uint32_t off = (rs2 >> 16) & (XLEN - 1); + len = len ? len : 16; + const uint32_t mask = _rv_rol(_rv_slo(0, len), off); + const uint32_t data = _rv_rol(rs2, off); + return (data & mask) | (rs1 & ~mask); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_clmulhw(const uint32_t rs1, + const uint32_t rs2) { + uint32_t x = 0; + for (uint_xlen_t i = 1; i < XLEN; i++) { + if ((rs2 >> i) & 1) { + x ^= rs1 >> (XLEN - i); + } + } + return x; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_clmulrw(const uint32_t rs1, + const uint32_t rs2) { + uint32_t x = 0; + for (uint_xlen_t i = 0; i < XLEN; i++) { + if ((rs2 >> i) & 1) { + x ^= rs1 >> (XLEN - i - 1); + } + } + return x; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_clmulw(const uint32_t rs1, + const uint32_t rs2) { + uint32_t x = 0; + for (uint_xlen_t i = 0; i < XLEN; i++) { + if ((rs2 >> i) & 1) { + x ^= rs1 << i; + } + } + return x; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_crc32_d(const uint_xlen_t rs1) { + return crc32(rs1, 64); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_crc32c_d(const uint_xlen_t rs1) { + return crc32c(rs1, 64); +} + +static __inline__ uint64_t __DEFAULT_FN_ATTRS _rv_gorc(const uint64_t rs1, + const uint64_t rs2) { + uint64_t x = rs1; + const uint_xlen_t shamt = rs2 & 63; + if (shamt & 1) + x |= ((x & 0x5555555555555555LL) << 1) | ((x & 0xAAAAAAAAAAAAAAAALL) >> 1); + if (shamt & 2) + x |= ((x & 0x3333333333333333LL) << 2) | ((x & 0xCCCCCCCCCCCCCCCCLL) >> 2); + if (shamt & 4) + x |= ((x & 0x0F0F0F0F0F0F0F0FLL) << 4) | ((x & 0xF0F0F0F0F0F0F0F0LL) >> 4); + if (shamt & 8) + x |= ((x & 0x00FF00FF00FF00FFLL) << 8) | ((x & 0xFF00FF00FF00FF00LL) >> 8); + if (shamt & 16) + x |= + ((x & 0x0000FFFF0000FFFFLL) << 16) | ((x & 0xFFFF0000FFFF0000LL) >> 16); + if (shamt & 32) + x |= + ((x & 0x00000000FFFFFFFFLL) << 32) | ((x & 0xFFFFFFFF00000000LL) >> 32); + return x; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_gorcw(const uint32_t rs1, + const uint32_t rs2) { + uint32_t x = rs1; + const uint_xlen_t shamt = rs2 & 31; + if (shamt & 1) { + x |= ((x & 0x55555555) << 1) | ((x & 0xAAAAAAAA) >> 1); + } + if (shamt & 2) { + x |= ((x & 0x33333333) << 2) | ((x & 0xCCCCCCCC) >> 2); + } + if (shamt & 4) { + x |= ((x & 0x0F0F0F0F) << 4) | ((x & 0xF0F0F0F0) >> 4); + } + if (shamt & 8) { + x |= ((x & 0x00FF00FF) << 8) | ((x & 0xFF00FF00) >> 8); + } + if (shamt & 16) { + x |= ((x & 0x0000FFFF) << 16) | ((x & 0xFFFF0000) >> 16); + } + return x; +} + +static __inline__ uint64_t __DEFAULT_FN_ATTRS _rv_grev(const uint64_t rs1, + const uint64_t rs2) { + uint64_t x = rs1; + const uint_xlen_t shamt = rs2 & 63; + if (shamt & 1) { + x = ((x & 0x5555555555555555LL) << 1) | ((x & 0xAAAAAAAAAAAAAAAALL) >> 1); + } + if (shamt & 2) { + x = ((x & 0x3333333333333333LL) << 2) | ((x & 0xCCCCCCCCCCCCCCCCLL) >> 2); + } + if (shamt & 4) { + x = ((x & 0x0F0F0F0F0F0F0F0FLL) << 4) | ((x & 0xF0F0F0F0F0F0F0F0LL) >> 4); + } + if (shamt & 8) { + x = ((x & 0x00FF00FF00FF00FFLL) << 8) | ((x & 0xFF00FF00FF00FF00LL) >> 8); + } + if (shamt & 16) { + x = ((x & 0x0000FFFF0000FFFFLL) << 16) | ((x & 0xFFFF0000FFFF0000LL) >> 16); + } + if (shamt & 32) { + x = ((x & 0x00000000FFFFFFFFLL) << 32) | ((x & 0xFFFFFFFF00000000LL) >> 32); + } + return x; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_grevw(const uint32_t rs1, + const uint32_t rs2) { + uint32_t x = rs1; + const uint_xlen_t shamt = rs2 & 31; + if (shamt & 1) { + x = ((x & 0x55555555) << 1) | ((x & 0xAAAAAAAA) >> 1); + } + if (shamt & 2) { + x = ((x & 0x33333333) << 2) | ((x & 0xCCCCCCCC) >> 2); + } + if (shamt & 4) { + x = ((x & 0x0F0F0F0F) << 4) | ((x & 0xF0F0F0F0) >> 4); + } + if (shamt & 8) { + x = ((x & 0x00FF00FF) << 8) | ((x & 0xFF00FF00) >> 8); + } + if (shamt & 16) { + x = ((x & 0x0000FFFF) << 16) | ((x & 0xFFFF0000) >> 16); + } + return x; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_packw(const uint32_t rs1, + const uint32_t rs2) { + // XLEN / 4 to get half of a 32 bit value on riscv64 + return (rs2 << XLEN / 4) | ((rs1 << XLEN / 4) >> XLEN / 4); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_packuw(const uint32_t rs1, + const uint32_t rs2) { + // XLEN / 4 to get half of a 32 bit value on riscv64 + return (rs1 >> XLEN / 4) | ((rs2 >> XLEN / 4) << XLEN / 4) +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_sbclrw(const uint32_t rs1, + const uint32_t rs2) { + return rs1 & ~(1 << (rs2 & (XLEN - 1))); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_sbextw(const uint32_t rs1, + const uint32_t rs2) { + return 1 & (rs1 >> (rs2 & (XLEN - 1))); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_sbinvw(const uint32_t rs1, + const uint32_t rs2) { + return rs1 ^ (1 << (rs2 & (XLEN - 1))); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_sbsetw(const uint32_t rs1, + const uint32_t rs2) { + return rs1 | (1 << (rs2 & (XLEN - 1))); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_slow(const uint32_t rs1, + const uint32_t rs2) { + return ~(~rs1 << (rs2 & (XLEN - 1))); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_srow(const uint32_t rs1, + const uint32_t rs2) { + return ~(~rs1 >> (rs2 & (XLEN - 1))); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_subu_w(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return rs1 - (rs2 & 0xFFFFFFFF); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_subwu(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return (rs1 - rs2) & 0xFFFFFFFF; +} + +static __inline__ uint64_t __DEFAULT_FN_ATTRS _rv_unshfl(const uint64_t rs1, + const uint64_t rs2) { + uint64_t x = rs1; + const uint_xlen_t shamt = rs2 & 31; + if (shamt & 1) { + x = shuffle_stage(x, 0x4444444444444444LL, 0x2222222222222222LL, 1); + } + if (shamt & 2) { + x = shuffle_stage(x, 0x3030303030303030LL, 0x0c0c0c0c0c0c0c0cLL, 2); + } + if (shamt & 4) { + x = shuffle_stage(x, 0x0f000f000f000f00LL, 0x00f000f000f000f0LL, 4); + } + if (shamt & 8) { + x = shuffle_stage(x, 0x00ff000000ff0000LL, 0x0000ff000000ff00LL, 8); + } + if (shamt & 16) { + x = shuffle_stage(x, 0x0000ffff00000000LL, 0x00000000ffff0000LL, 16); + } + return x; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_unshflw(const uint32_t rs1, + const uint32_t rs2) { + uint32_t x = rs1; + const uint_xlen_t shamt = rs2 & 15; + if (shamt & 1) { + x = shuffle32_stage(x, 0x44444444, 0x22222222, 1); + } + if (shamt & 2) { + x = shuffle32_stage(x, 0x30303030, 0x0c0c0c0c, 2); + } + if (shamt & 4) { + x = shuffle32_stage(x, 0x0f000f00, 0x00f000f0, 4); + } + if (shamt & 8) { + x = shuffle32_stage(x, 0x00ff0000, 0x0000ff00, 8); + } + return x; +} + +#endif //__RV64INTRIN_EMULATION_H diff --git a/clang/lib/Headers/rv64bintrin.h b/clang/lib/Headers/rv64bintrin.h new file mode 100644 --- /dev/null +++ b/clang/lib/Headers/rv64bintrin.h @@ -0,0 +1,1127 @@ +/* ===-------- rv64bintrin.h -----------------------------------------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __RV64BINTRIN_H +#define __RV64BINTRIN_H + +#ifndef __RVINTRIN_H +#error "Never use directly; include instead." +#endif + +#include + +#include "rv64bintrin-builtins.h" + +#if defined(__riscv_bitmanip) + +#include "rv64bintrin-asm.h" + +#else // Bitmanip extension is disabled. + +#include "rv64bintrin-emulation.h" + +#endif // defined(__riscv_bitmanip) + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_andn(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_andn(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_fsl(const uint_xlen_t rs1, const uint_xlen_t rs2, const uint_xlen_t rs3) { + return _rv_fsl(rs1, rs2, rs3); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_fsr(const uint_xlen_t rs1, const uint_xlen_t rs2, const uint_xlen_t rs3) { + return _rv_fsr(rs1, rs2, rs3); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_max(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_max(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_maxu(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_maxu(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_min(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_min(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_minu(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_minu(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_orn(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_orn(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_rol(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_rol(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_ror(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_ror(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_bdep(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_bdep(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_bext(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_bext(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_bfp(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_bfp(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_clmul(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_clmul(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_clmulh(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_clmulh(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_clmulr(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_clmulr(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS _rv64_cmix( + const uint_xlen_t rs1, const uint_xlen_t rs2, const uint_xlen_t rs3) { + return _rv_cmix(rs1, rs2, rs3); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS _rv64_cmov( + const uint_xlen_t rs1, const uint_xlen_t rs2, const uint_xlen_t rs3) { + return _rv_cmov(rs1, rs2, rs3); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_crc32_b(const uint_xlen_t rs1) { + return _rv_crc32_b(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_crc32_h(const uint_xlen_t rs1) { + return _rv_crc32_h(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_crc32_w(const uint_xlen_t rs1) { + return _rv_crc32_w(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_crc32c_b(const uint_xlen_t rs1) { + return _rv_crc32c_b(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_crc32c_h(const uint_xlen_t rs1) { + return _rv_crc32c_b(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_crc32c_w(const uint_xlen_t rs1) { + return _rv_crc32c_b(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_gorc(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_gorc(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_grev(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_grev(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_pack(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_pack(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_packu(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_packu(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_packh(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_packh(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_sext_b(const uint_xlen_t rs1) { + return _rv_sext_b(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_sext_h(const uint_xlen_t rs1) { + return _rv_sext_h(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_sbclr(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_sbclr(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_sbext(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_sbext(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_sbinv(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_sbinv(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_sbset(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_sbset(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_shfl(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_shfl(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_slo(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_slo(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_sro(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_sro(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_unshfl(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_unshfl(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_xnor(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_xnor(rs1, rs2); +} + +// 64 bit only + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv64_fslw(const uint32_t rs1, + const uint32_t rs2, + const uint32_t rs3) { + return _rv_fslw(rs1, rs2, rs3); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv64_fsrw(const uint32_t rs1, + const uint32_t rs2, + const uint32_t rs3) { + return _rv_fsrw(rs1, rs2, rs3); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv64_rolw(const uint32_t rs1, + const uint32_t rs2) { + return _rv_rolw(rs1, rs2); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv64_rorw(const uint32_t rs1, + const uint32_t rs2) { + return _rv_rorw(rs1, rs2); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv64_addu_w(const uint32_t rs1, + const uint32_t rs2) { + return _rv_addu_w(rs1, rs2); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv64_addwu(const uint32_t rs1, + const uint32_t rs2) { + return _rv_addwu(rs1, rs2); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv64_bdepw(const uint32_t rs1, + const uint32_t rs2) { + return _rv_bdepw(rs1, rs2); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv64_bfpw(const uint32_t rs1, + const uint32_t rs2) { + return _rv_bfpw(rs1, rs2); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv64_bextw(const uint32_t rs1, + const uint32_t rs2) { + return _rv_bextw(rs1, rs2); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS +_rv64_bmatflip(const uint32_t rs1) { + return _rv_bmatflip(rs1); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv64_bmator(const uint32_t rs1, + const uint32_t rs2) { + return _rv_bmator(rs1, rs2); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS +_rv64_bmatxor(const uint32_t rs1, const uint32_t rs2) { + return _rv_bmatxor(rs1, rs2); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS +_rv64_clmulhw(const uint32_t rs1, const uint32_t rs2) { + return _rv_clmulhw(rs1, rs2); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS +_rv64_clmulrw(const uint32_t rs1, const uint32_t rs2) { + return _rv_clmulrw(rs1, rs2); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv64_clmulw(const uint32_t rs1, + const uint32_t rs2) { + return _rv_clmulw(rs1, rs2); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv64_crc32_d(const uint32_t rs1) { + return _rv_crc32_d(rs1); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS +_rv64_crc32c_d(const uint32_t rs1) { + return _rv_crc32c_d(rs1); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv64_gorcw(const uint32_t rs1, + const uint32_t rs2) { + return _rv_gorcw(rs1, rs2); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv64_grevw(const uint32_t rs1, + const uint32_t rs2) { + return _rv_grevw(rs1, rs2); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv64_packw(const uint32_t rs1, + const uint32_t rs2) { + return _rv_packw(rs1, rs2); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv64_packuw(const uint32_t rs1, + const uint32_t rs2) { + return _rv_packuw(rs1, rs2); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv64_sbclrw(const uint32_t rs1, + const uint32_t rs2) { + return _rv_sbclrw(rs1, rs2); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv64_sbextw(const uint32_t rs1, + const uint32_t rs2) { + return _rv_sbextw(rs1, rs2); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv64_sbinvw(const uint32_t rs1, + const uint32_t rs2) { + return _rv_sbinvw(rs1, rs2); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv64_sbsetw(const uint32_t rs1, + const uint32_t rs2) { + return _rv_sbsetw(rs1, rs2); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv64_shflw(const uint32_t rs1, + const uint32_t rs2) { + return _rv_shflw(rs1, rs2); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv64_slow(const uint32_t rs1, + const uint32_t rs2) { + return _rv_slow(rs1, rs2); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv64_srow(const uint32_t rs1, + const uint32_t rs2) { + return _rv_srow(rs1, rs2); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv64_subu_w(const uint32_t rs1, + const uint32_t rs2) { + return _rv_subu_w(rs1, rs2); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv64_subwu(const uint32_t rs1, + const uint32_t rs2) { + return _rv_subwu(rs1, rs2); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS +_rv64_unshflw(const uint32_t rs1, const uint32_t rs2) { + return _rv_unshflw(rs1, rs2); +} + +// Pseudo Instructions + +// REV (GREVI) pseudo instructions + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_rev_p(const uint_xlen_t rs1) { + return _rv64_grev(rs1, 0b000001); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_rev2_n(const uint_xlen_t rs1) { + return _rv64_grev(rs1, 0b000010); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_rev_n(const uint_xlen_t rs1) { + return _rv64_grev(rs1, 0b000011); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_rev4_b(const uint_xlen_t rs1) { + return _rv64_grev(rs1, 0b000100); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_rev2_b(const uint_xlen_t rs1) { + return _rv64_grev(rs1, 0b000110); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_rev_b(const uint_xlen_t rs1) { + return _rv64_grev(rs1, 0b000111); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_rev8_h(const uint_xlen_t rs1) { + return _rv64_grev(rs1, 0b001000); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_rev4_h(const uint_xlen_t rs1) { + return _rv64_grev(rs1, 0b001100); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_rev2_h(const uint_xlen_t rs1) { + return _rv64_grev(rs1, 0b001110); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_rev_h(const uint_xlen_t rs1) { + return _rv64_grev(rs1, 0b001111); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_rev16_w(const uint_xlen_t rs1) { + return _rv64_grev(rs1, 0b010000); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_rev8_w(const uint_xlen_t rs1) { + return _rv64_grev(rs1, 0b011000); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_rev4_w(const uint_xlen_t rs1) { + return _rv64_grev(rs1, 0b011100); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_rev2_w(const uint_xlen_t rs1) { + return _rv64_grev(rs1, 0b011110); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_rev_w(const uint_xlen_t rs1) { + return _rv64_grev(rs1, 0b011111); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_rev32(const uint_xlen_t rs1) { + return _rv64_grev(rs1, 0b100000); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_rev16(const uint_xlen_t rs1) { + return _rv64_grev(rs1, 0b110000); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_rev8(const uint_xlen_t rs1) { + return _rv64_grev(rs1, 0b111000); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_rev4(const uint_xlen_t rs1) { + return _rv64_grev(rs1, 0b111100); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_rev2(const uint_xlen_t rs1) { + return _rv64_grev(rs1, 0b111110); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_rev(const uint_xlen_t rs1) { + return _rv64_grev(rs1, 0b111111); +} + +// ORC (GORCI) pseudo instructions + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_orc_p(const uint_xlen_t rs1) { + return _rv64_gorc(rs1, 0b000001); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_orc2_n(const uint_xlen_t rs1) { + return _rv64_gorc(rs1, 0b000010); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_orc_n(const uint_xlen_t rs1) { + return _rv64_gorc(rs1, 0b000011); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_orc4_b(const uint_xlen_t rs1) { + return _rv64_gorc(rs1, 0b000100); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_orc2_b(const uint_xlen_t rs1) { + return _rv64_gorc(rs1, 0b000110); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_orc_b(const uint_xlen_t rs1) { + return _rv64_gorc(rs1, 0b000111); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_orc8_h(const uint_xlen_t rs1) { + return _rv64_gorc(rs1, 0b001000); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_orc4_h(const uint_xlen_t rs1) { + return _rv64_gorc(rs1, 0b001100); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_orc2_h(const uint_xlen_t rs1) { + return _rv64_gorc(rs1, 0b001110); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_orc_h(const uint_xlen_t rs1) { + return _rv64_gorc(rs1, 0b001111); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_orc16_w(const uint_xlen_t rs1) { + return _rv64_gorc(rs1, 0b010000); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_orc8_w(const uint_xlen_t rs1) { + return _rv64_gorc(rs1, 0b011000); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_orc4_w(const uint_xlen_t rs1) { + return _rv64_gorc(rs1, 0b011100); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_orc2_w(const uint_xlen_t rs1) { + return _rv64_gorc(rs1, 0b011110); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_orc_w(const uint_xlen_t rs1) { + return _rv64_gorc(rs1, 0b011111); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_orc32(const uint_xlen_t rs1) { + return _rv64_gorc(rs1, 0b100000); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_orc16(const uint_xlen_t rs1) { + return _rv64_gorc(rs1, 0b110000); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_orc8(const uint_xlen_t rs1) { + return _rv64_gorc(rs1, 0b111000); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_orc4(const uint_xlen_t rs1) { + return _rv64_gorc(rs1, 0b111100); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_orc2(const uint_xlen_t rs1) { + return _rv64_gorc(rs1, 0b111110); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_orc(const uint_xlen_t rs1) { + return _rv64_gorc(rs1, 0b111111); +} + +// ZIP (SHFLI) pseudo instructions +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_zip_n(const uint_xlen_t rs1) { + return _rv64_shfl(rs1, 0b00001); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_zip2_b(const uint_xlen_t rs1) { + return _rv64_shfl(rs1, 0b00010); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_zip_b(const uint_xlen_t rs1) { + return _rv64_shfl(rs1, 0b00011); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_zip4_h(const uint_xlen_t rs1) { + return _rv64_shfl(rs1, 0b00100); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_zip2_h(const uint_xlen_t rs1) { + return _rv64_shfl(rs1, 0b00110); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_zip_h(const uint_xlen_t rs1) { + return _rv64_shfl(rs1, 0b00111); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_zip8_w(const uint_xlen_t rs1) { + return _rv64_shfl(rs1, 0b01000); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_zip4_w(const uint_xlen_t rs1) { + return _rv64_shfl(rs1, 0b01100); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_zip2_w(const uint_xlen_t rs1) { + return _rv64_shfl(rs1, 0b01110); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_zip_w(const uint_xlen_t rs1) { + return _rv64_shfl(rs1, 0b01111); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_zip16(const uint_xlen_t rs1) { + return _rv64_shfl(rs1, 0b10000); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_zip8(const uint_xlen_t rs1) { + return _rv64_shfl(rs1, 0b11000); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_zip4(const uint_xlen_t rs1) { + return _rv64_shfl(rs1, 0b11100); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_zip2(const uint_xlen_t rs1) { + return _rv64_shfl(rs1, 0b11110); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_zip(const uint_xlen_t rs1) { + return _rv64_shfl(rs1, 0b11111); +} + +// UNZIP (UNSHFL) pseudo instructions + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_unzip_n(const uint_xlen_t rs1) { + return _rv64_unshfl(rs1, 0b00001); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_unzip2_b(const uint_xlen_t rs1) { + return _rv64_unshfl(rs1, 0b00010); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_unzip_b(const uint_xlen_t rs1) { + return _rv64_unshfl(rs1, 0b00011); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_unzip4_h(const uint_xlen_t rs1) { + return _rv64_unshfl(rs1, 0b00100); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_unzip2_h(const uint_xlen_t rs1) { + return _rv64_unshfl(rs1, 0b00110); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_unzip_h(const uint_xlen_t rs1) { + return _rv64_unshfl(rs1, 0b00111); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_unzip8_w(const uint_xlen_t rs1) { + return _rv64_unshfl(rs1, 0b01000); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_unzip4_w(const uint_xlen_t rs1) { + return _rv64_unshfl(rs1, 0b01100); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_unzip2_w(const uint_xlen_t rs1) { + return _rv64_unshfl(rs1, 0b01110); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_unzip_w(const uint_xlen_t rs1) { + return _rv64_unshfl(rs1, 0b01111); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_unzip16(const uint_xlen_t rs1) { + return _rv64_unshfl(rs1, 0b11000); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_unzip8(const uint_xlen_t rs1) { + return _rv64_unshfl(rs1, 0b11000); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_unzip4(const uint_xlen_t rs1) { + return _rv64_unshfl(rs1, 0b11100); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_unzip2(const uint_xlen_t rs1) { + return _rv64_unshfl(rs1, 0b11110); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_unzip(const uint_xlen_t rs1) { + return _rv64_unshfl(rs1, 0b11111); +} + +// Genric aliases for 64 bit pseudo instructions +// e.g. _rv_* is an alias of _rv64_* + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_rev_p(const uint_xlen_t rs1) { + return _rv64_rev_p(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_rev2_n(const uint_xlen_t rs1) { + return _rv64_rev2_n(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_rev_n(const uint_xlen_t rs1) { + return _rv64_rev_n(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_rev4_b(const uint_xlen_t rs1) { + return _rv64_rev4_b(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_rev2_b(const uint_xlen_t rs1) { + return _rv64_rev2_b(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_rev_b(const uint_xlen_t rs1) { + return _rv64_rev_b(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_rev8_h(const uint_xlen_t rs1) { + return _rv64_rev8_h(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_rev4_h(const uint_xlen_t rs1) { + return _rv64_rev4_h(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_rev2_h(const uint_xlen_t rs1) { + return _rv64_rev2_h(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_rev_h(const uint_xlen_t rs1) { + return _rv64_rev_h(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_rev16_w(const uint_xlen_t rs1) { + return _rv64_rev16_w(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_rev8_w(const uint_xlen_t rs1) { + return _rv64_rev8_w(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_rev4_w(const uint_xlen_t rs1) { + return _rv64_rev4_w(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_rev2_w(const uint_xlen_t rs1) { + return _rv64_rev2_w(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_rev_w(const uint_xlen_t rs1) { + return _rv64_rev_w(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_rev32(const uint_xlen_t rs1) { + return _rv64_rev32(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_rev16(const uint_xlen_t rs1) { + return _rv64_rev16(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_rev8(const uint_xlen_t rs1) { + return _rv64_rev8(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_rev4(const uint_xlen_t rs1) { + return _rv64_rev4(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_rev2(const uint_xlen_t rs1) { + return _rv64_rev2(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_rev(const uint_xlen_t rs1) { + return _rv64_rev(rs1); +} + +// ORC (GORCI) pseudo instructions + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_orc_p(const uint_xlen_t rs1) { + return _rv64_orc_p(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_orc2_n(const uint_xlen_t rs1) { + return _rv64_orc2_n(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_orc_n(const uint_xlen_t rs1) { + return _rv64_orc_n(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_orc4_b(const uint_xlen_t rs1) { + return _rv64_orc4_b(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_orc2_b(const uint_xlen_t rs1) { + return _rv64_orc2_b(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_orc_b(const uint_xlen_t rs1) { + return _rv64_orc_b(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_orc8_h(const uint_xlen_t rs1) { + return _rv64_orc8_h(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_orc4_h(const uint_xlen_t rs1) { + return _rv64_orc4_h(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_orc2_h(const uint_xlen_t rs1) { + return _rv64_orc2_h(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_orc_h(const uint_xlen_t rs1) { + return _rv64_orc_h(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_orc16_w(const uint_xlen_t rs1) { + return _rv64_orc16_w(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_orc8_w(const uint_xlen_t rs1) { + return _rv64_orc8_w(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_orc4_w(const uint_xlen_t rs1) { + return _rv64_orc4_w(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_orc2_w(const uint_xlen_t rs1) { + return _rv64_orc2_w(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_orc_w(const uint_xlen_t rs1) { + return _rv64_orc_w(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_orc32(const uint_xlen_t rs1) { + return _rv64_orc32(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_orc16(const uint_xlen_t rs1) { + return _rv64_orc16(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_orc8(const uint_xlen_t rs1) { + return _rv64_orc8(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_orc4(const uint_xlen_t rs1) { + return _rv64_orc4(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_orc2(const uint_xlen_t rs1) { + return _rv64_orc2(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_orc(const uint_xlen_t rs1) { + return _rv64_orc(rs1); +} + +// ZIP (SHFLI) pseudo instructions + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_zip_n(const uint_xlen_t rs1) { + return _rv64_zip_n(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_zip2_b(const uint_xlen_t rs1) { + return _rv64_zip2_b(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_zip_b(const uint_xlen_t rs1) { + return _rv64_zip_b(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_zip4_h(const uint_xlen_t rs1) { + return _rv64_zip4_h(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_zip2_h(const uint_xlen_t rs1) { + return _rv64_zip2_h(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_zip_h(const uint_xlen_t rs1) { + return _rv64_zip_h(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_zip8_w(const uint_xlen_t rs1) { + return _rv64_zip8_w(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_zip4_w(const uint_xlen_t rs1) { + return _rv64_zip4_w(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_zip2_w(const uint_xlen_t rs1) { + return _rv64_zip2_w(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_zip_w(const uint_xlen_t rs1) { + return _rv64_zip_w(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_zip16(const uint_xlen_t rs1) { + return _rv64_zip16(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_zip8(const uint_xlen_t rs1) { + return _rv64_zip8(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_zip4(const uint_xlen_t rs1) { + return _rv64_zip4(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_zip2(const uint_xlen_t rs1) { + return _rv64_zip2(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_zip(const uint_xlen_t rs1) { + return _rv64_zip(rs1); +} + +// UNZIP (UNSHFL) pseudo instructions + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_unzip_n(const uint_xlen_t rs1) { + return _rv64_unzip_n(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_unzip2_b(const uint_xlen_t rs1) { + return _rv64_unzip2_b(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_unzip_b(const uint_xlen_t rs1) { + return _rv64_unzip_b(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_unzip4_h(const uint_xlen_t rs1) { + return _rv64_unzip4_h(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_unzip2_h(const uint_xlen_t rs1) { + return _rv64_unzip2_h(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_unzip_h(const uint_xlen_t rs1) { + return _rv64_unzip_h(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_unzip8_w(const uint_xlen_t rs1) { + return _rv64_unzip8_w(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_unzip4_w(const uint_xlen_t rs1) { + return _rv64_unzip4_w(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_unzip2_w(const uint_xlen_t rs1) { + return _rv64_unzip2_w(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_unzip_w(const uint_xlen_t rs1) { + return _rv64_unzip_w(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_unzip16(const uint_xlen_t rs1) { + return _rv64_unzip16(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_unzip8(const uint_xlen_t rs1) { + return _rv64_unzip8(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_unzip4(const uint_xlen_t rs1) { + return _rv64_unzip4(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_unzip2(const uint_xlen_t rs1) { + return _rv64_unzip2(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_unzip(const uint_xlen_t rs1) { + return _rv64_unzip(rs1); +} + +#endif // __RV64BINTRIN_H diff --git a/clang/lib/Headers/rvbintrin-asm.h b/clang/lib/Headers/rvbintrin-asm.h new file mode 100644 --- /dev/null +++ b/clang/lib/Headers/rvbintrin-asm.h @@ -0,0 +1,343 @@ +/* ===-------- rvbintrin-asm.h ---------------------------------------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __RVBINTRIN_ASM_H +#define __RVBINTRIN_ASM_H + +#ifndef __RVINTRIN_H +#error "Never use directly; include instead." +#endif + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_andn(const uint_xlen_t rs1, const uint_xlen_t rs2) { + uint_xlen_t rd; + __asm__("andn %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + return rd; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_fsl(const uint_xlen_t rs1, const uint_xlen_t rs2, const uint_xlen_t rs3) { + uint_xlen_t rd; + __asm__("fsl %0, %1, %2, %3" : "=r"(rd) : "r"(rs1), "r"(rs2), "r"(rs3)); + return rd; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_fsr(const uint_xlen_t rs1, const uint_xlen_t rs2, const uint_xlen_t rs3) { + uint_xlen_t rd; + if (__builtin_constant_p(rs2)) { + __asm__("fsri %0, %1, %2, %3" : "=r"(rd) : "r"(rs1), "r"(rs2), "i"(rs3)); + } else { + __asm__("fsr %0, %1, %2, %3" : "=r"(rd) : "r"(rs1), "r"(rs2), "r"(rs3)); + } + return rd; +} + +static __inline__ int_xlen_t _rv_max(const int_xlen_t rs1, + const int_xlen_t rs2) { + int_xlen_t rd; + __asm__("max %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + return rd; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_maxu(const uint_xlen_t rs1, const uint_xlen_t rs2) { + uint_xlen_t rd; + __asm__("maxu %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + return rd; +} + +static __inline__ int_xlen_t _rv_min(const int_xlen_t rs1, + const int_xlen_t rs2) { + int_xlen_t rd; + __asm__("min %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + return rd; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_minu(const uint_xlen_t rs1, const uint_xlen_t rs2) { + uint_xlen_t rd; + __asm__("minu %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + return rd; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_orn(const uint_xlen_t rs1, const uint_xlen_t rs2) { + uint_xlen_t rd; + __asm__("orn %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + return rd; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_rol(const uint_xlen_t rs1, const uint_xlen_t rs2) { + uint_xlen_t rd; + __asm__("rol %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + return rd; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_ror(const uint_xlen_t rs1, const uint_xlen_t rs2) { + uint_xlen_t rd; + if (__builtin_constant_p(rs2)) { + __asm__("rori %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(rs2)); + } else { + __asm__("ror %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + } + return rd; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_bdep(const uint_xlen_t rs1, const uint_xlen_t rs2) { + uint_xlen_t rd; + __asm__("bdep %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + return rd; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_bext(const uint_xlen_t rs1, const uint_xlen_t rs2) { + uint_xlen_t rd; + __asm__("bext %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + return rd; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_bfp(const uint_xlen_t rs1, const uint_xlen_t rs2) { + uint_xlen_t rd; + __asm__("bfp %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + return rd; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_clmul(const uint_xlen_t rs1, const uint_xlen_t rs2) { + uint_xlen_t rd; + __asm__("clmul %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + return rd; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_clmulh(const uint_xlen_t rs1, const uint_xlen_t rs2) { + uint_xlen_t rd; + __asm__("clmulh %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + return rd; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_clmulr(const uint_xlen_t rs1, const uint_xlen_t rs2) { + uint_xlen_t rd; + __asm__("clmulr %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + return rd; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_cmix(const uint_xlen_t rs1, const uint_xlen_t rs2, const uint_xlen_t rs3) { + uint_xlen_t rd; + __asm__("cmix %0, %1, %2, %3" : "=r"(rd) : "r"(rs1), "r"(rs2), "r"(rs3)); + return rd; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_cmov(const uint_xlen_t rs1, const uint_xlen_t rs2, const uint_xlen_t rs3) { + uint_xlen_t rd; + __asm__("cmov %0, %1, %2, %3" : "=r"(rd) : "r"(rs1), "r"(rs2), "r"(rs3)); + return rd; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_crc32_b(const uint_xlen_t rs1) { + uint_xlen_t rd; + __asm__("crc32.b %0, %1" : "=r"(rd) : "r"(rs1)); + return rd; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_crc32_h(const uint_xlen_t rs1) { + uint_xlen_t rd; + __asm__("crc32.h %0, %1" : "=r"(rd) : "r"(rs1)); + return rd; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_crc32_w(const uint_xlen_t rs1) { + uint_xlen_t rd; + __asm__("crc32.w %0, %1" : "=r"(rd) : "r"(rs1)); + return rd; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_crc32c_b(const uint_xlen_t rs1) { + uint_xlen_t rd; + __asm__("crc32c.b %0, %1" : "=r"(rd) : "r"(rs1)); + return rd; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_crc32c_h(const uint_xlen_t rs1) { + uint_xlen_t rd; + __asm__("crc32c.h %0, %1" : "=r"(rd) : "r"(rs1)); + return rd; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_crc32c_w(const uint_xlen_t rs1) { + uint_xlen_t rd; + __asm__("crc32c.w %0, %1" : "=r"(rd) : "r"(rs1)); + return rd; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_gorc(const uint_xlen_t rs1, const uint_xlen_t rs2) { + uint_xlen_t rd; + if (__builtin_constant_p(rs2)) { + __asm__("gorci %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(rs2)); + } else { + __asm__("gorc %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + } + return rd; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_grev(const uint_xlen_t rs1, const uint_xlen_t rs2) { + uint_xlen_t rd; + if (__builtin_constant_p(rs2)) { + __asm__("grevi %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(rs2)); + } else { + __asm__("grev %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + } + return rd; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_pack(const uint_xlen_t rs1, const uint_xlen_t rs2) { + uint_xlen_t rd; + __asm__("pack %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + return rd; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_packu(const uint_xlen_t rs1, const uint_xlen_t rs2) { + uint_xlen_t rd; + __asm__("packu %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + return rd; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_packh(const uint_xlen_t rs1, const uint_xlen_t rs2) { + uint_xlen_t rd; + __asm__("packh %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + return rd; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_sext_b(const uint_xlen_t rs1) { + uint_xlen_t rd; + __asm__("sext.b %0, %1" : "=r"(rd) : "r"(rs1)); + return rd; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_sext_h(const uint_xlen_t rs1) { + uint_xlen_t rd; + __asm__("sext.h %0, %1" : "=r"(rd) : "r"(rs1)); + return rd; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_sbclr(const uint_xlen_t rs1, const uint_xlen_t rs2) { + uint_xlen_t rd; + if (__builtin_constant_p(rs2)) { + __asm__("sbclri %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(rs2)); + } else { + __asm__("sbclr %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + } + return rd; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_sbext(const uint_xlen_t rs1, const uint_xlen_t rs2) { + uint_xlen_t rd; + if (__builtin_constant_p(rs2)) { + __asm__("sbexti %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(rs2)); + } else { + __asm__("sbext %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + } + return rd; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_sbinv(const uint_xlen_t rs1, const uint_xlen_t rs2) { + uint_xlen_t rd; + if (__builtin_constant_p(rs2)) { + __asm__("sbinvi %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(rs2)); + } else { + __asm__("sbinv %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + } + return rd; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_sbset(const uint_xlen_t rs1, const uint_xlen_t rs2) { + uint_xlen_t rd; + if (__builtin_constant_p(rs2)) { + __asm__("sbseti %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(rs2)); + } else { + __asm__("sbset %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + } + return rd; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_shfl(const uint_xlen_t rs1, const uint_xlen_t rs2) { + uint_xlen_t rd; + if (__builtin_constant_p(rs2)) { + __asm__("shfli %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(rs2)); + } else { + __asm__("shfl %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + } + return rd; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_slo(const uint_xlen_t rs1, const uint_xlen_t rs2) { + uint_xlen_t rd; + if (__builtin_constant_p(rs2)) { + __asm__("sloi %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(rs2)); + } else { + __asm__("slo %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + } + return rd; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_sro(const uint_xlen_t rs1, const uint_xlen_t rs2) { + uint_xlen_t rd; + if (__builtin_constant_p(rs2)) { + __asm__("sroi %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(rs2)); + } else { + __asm__("sro %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + } + return rd; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_unshfl(const uint_xlen_t rs1, const uint_xlen_t rs2) { + uint_xlen_t rd; + if (__builtin_constant_p(rs2)) { + __asm__("unshfli %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(rs2)); + } else { + __asm__("unshfl %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + } + return rd; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_xnor(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return rs1 ^ ~rs2; +} + +#endif // __RVBINTRIN_ASM_H diff --git a/clang/lib/Headers/rvbintrin-emulation.h b/clang/lib/Headers/rvbintrin-emulation.h new file mode 100644 --- /dev/null +++ b/clang/lib/Headers/rvbintrin-emulation.h @@ -0,0 +1,285 @@ +/* ===-------- rvbintrin-emulation.h ---------------------------------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __RVBINTRIN_EMULATION_H +#define __RVBINTRIN_EMULATION_H + +#ifndef __RVINTRIN_H +#error \ + "Never use directly; include instead." +#endif + +#include + +// If the builtins are unavailable then these provide the same functionality. + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_slo(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return ~(~rs1 << (rs2 & (XLEN - 1))); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_andn(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return rs1 & ~rs2; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_fsl(const uint_xlen_t rs1, const uint_xlen_t rs2, const uint_xlen_t rs3) { + uint_xlen_t shamt = rs2 & (2 * XLEN - 1); + uint_xlen_t A = rs1; + uint_xlen_t B = rs3; + if (shamt >= XLEN) { + shamt -= XLEN; + A = rs3; + B = rs1; + } + return shamt ? (A << shamt) | (B >> (XLEN - shamt)) : A; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_fsr(const uint_xlen_t rs1, const uint_xlen_t rs2, const uint_xlen_t rs3) { + uint_xlen_t shamt = rs2 & (2 * XLEN - 1); + uint_xlen_t A = rs1; + uint_xlen_t B = rs3; + if (shamt >= XLEN) { + shamt -= XLEN; + A = rs3; + B = rs1; + } + return shamt ? (A >> shamt) | (B << (XLEN - shamt)) : A; +} + +static __inline__ int_xlen_t _rv_max(const int_xlen_t rs1, + const int_xlen_t rs2) { + return rs1 > rs2 ? rs1 : rs2; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_maxu(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return rs1 > rs2 ? rs1 : rs2; +} + +static __inline__ int_xlen_t _rv_min(const int_xlen_t rs1, + const int_xlen_t rs2) { + return rs1 < rs2 ? rs1 : rs2; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_minu(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return rs1 < rs2 ? rs1 : rs2; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_orn(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return rs1 | ~rs2; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_rol(const uint_xlen_t rs1, const uint_xlen_t rs2) { + const uint_xlen_t shamt = rs2 & (XLEN - 1); + return (rs1 << shamt) | (rs1 >> ((XLEN - shamt) & (XLEN - 1))); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_ror(const uint_xlen_t rs1, const uint_xlen_t rs2) { + const uint_xlen_t shamt = rs2 & (XLEN - 1); + return (rs1 >> shamt) | (rs1 << ((XLEN - shamt) & (XLEN - 1))); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_bdep(const uint_xlen_t rs1, const uint_xlen_t rs2) { + uint_xlen_t r = 0; + for (uint_xlen_t i = 0, j = 0; i < XLEN; i++) { + if ((rs2 >> i) & 1) { + if ((rs1 >> j) & 1) { + r |= 1 << i; + } + j++; + } + } + return r; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_bext(const uint_xlen_t rs1, const uint_xlen_t rs2) { + uint_xlen_t r = 0; + for (uint_xlen_t i = 0, j = 0; i < XLEN; i++) { + if ((rs2 >> i) & 1) { + if ((rs1 >> i) & 1) { + r |= 1 << j; + } + j++; + } + } + return r; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_bfp(const uint_xlen_t rs1, const uint_xlen_t rs2) { + uint_xlen_t len = (rs2 >> 24) & 15; + const uint_xlen_t off = (rs2 >> 16) & (XLEN - 1); + len = len ? len : 16; + const uint_xlen_t mask = _rv_rol(_rv_slo(0, len), off); + const uint_xlen_t data = _rv_rol(rs2, off); + return (data & mask) | (rs1 & ~mask); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_clmul(const uint_xlen_t rs1, const uint_xlen_t rs2) { + uint_xlen_t x = 0; + for (uint_xlen_t i = 0; i < XLEN; i++) { + if ((rs2 >> i) & 1) { + x ^= rs1 << i; + } + } + return x; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_clmulh(const uint_xlen_t rs1, const uint_xlen_t rs2) { + uint_xlen_t x = 0; + for (uint_xlen_t i = 1; i < XLEN; i++) { + if ((rs2 >> i) & 1) { + x ^= rs1 >> (XLEN - i); + } + } + return x; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_clmulr(const uint_xlen_t rs1, const uint_xlen_t rs2) { + uint_xlen_t x = 0; + for (uint_xlen_t i = 0; i < XLEN; i++) { + if ((rs2 >> i) & 1) { + x ^= rs1 >> (XLEN - i - 1); + } + } + return x; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_cmix(const uint_xlen_t rs1, const uint_xlen_t rs2, const uint_xlen_t rs3) { + return (rs1 & rs2) | (rs3 & ~rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_cmov(const uint_xlen_t rs1, const uint_xlen_t rs2, const uint_xlen_t rs3) { + return rs2 ? rs1 : rs3; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +crc32(uint_xlen_t x, const uint_xlen_t nbits) { + for (uint_xlen_t i = 0; i < nbits; i++) { + x = (x >> 1) ^ (0xEDB88320 & ~((x & 1) - 1)); + } + return x; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +crc32c(uint_xlen_t x, const uint_xlen_t nbits) { + for (uint_xlen_t i = 0; i < nbits; i++) { + x = (x >> 1) ^ (0x82F63B78 & ~((x & 1) - 1)); + } + return x; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_crc32_b(const uint_xlen_t rs1) { + return crc32(rs1, 8); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_crc32_h(const uint_xlen_t rs1) { + return crc32(rs1, 16); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_crc32_w(const uint_xlen_t rs1) { + return crc32(rs1, 32); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_crc32c_b(const uint_xlen_t rs1) { + return crc32c(rs1, 8); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_crc32c_h(const uint_xlen_t rs1) { + return crc32c(rs1, 16); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_crc32c_w(const uint_xlen_t rs1) { + return crc32c(rs1, 32); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_pack(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return (rs2 << XLEN / 2) | ((rs1 << XLEN / 2) >> XLEN / 2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_packu(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return (rs1 >> XLEN / 2) | ((rs2 >> XLEN / 2) << XLEN / 2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_packh(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return (rs1 & 255) | ((rs2 & 255) << 8); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_sext_b(const uint_xlen_t rs1) { + return int_xlen_t(x << (XLEN - 8)) >> (XLEN - 8); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_sext_h(const uint_xlen_t rs1) { + return int_xlen_t(x << (XLEN - 16)) >> (XLEN - 16); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_sbclr(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return rs1 & ~(1 << (rs2 & (XLEN - 1))); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_sbext(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return 1 & (rs1 >> (rs2 & (XLEN - 1))); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_sbinv(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return rs1 ^ (1 << (rs2 & (XLEN - 1))); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_sbset(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return rs1 | (1 << (rs2 & (XLEN - 1))); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +shuffle_stage(const uint_xlen_t src, const uint_xlen_t maskL, + const uint_xlen_t maskR, const uint_xlen_t N) { + uint_xlen_t x = src & ~(maskL | maskR); + x |= ((src << N) & maskL) | ((src >> N) & maskR); + return x; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_sro(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return ~(~rs1 >> (rs2 & (XLEN - 1))); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_xnor(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return rs1 ^ ~rs2; +} + +#endif //__RVBINTRIN_EMULATION_H diff --git a/clang/lib/Headers/rvintrin.h b/clang/lib/Headers/rvintrin.h new file mode 100644 --- /dev/null +++ b/clang/lib/Headers/rvintrin.h @@ -0,0 +1,39 @@ +/* ===-------- rvintrin.h --------------------------------------------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __RVINTRIN_H +#define __RVINTRIN_H + +// Long is 32 bit on riscv32 and 64 bit on riscv64 +#define int_xlen_t long +#define uint_xlen_t unsigned int_xlen_t +#define XLEN __riscv_xlen + +_Static_assert(__riscv_xlen == sizeof(uint_xlen_t) * 8, + "uint_xlen_t is not __riscv_xlen bits long"); + +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __artificial__, __nodebug__)) + +#if defined(__riscv32__) + +#include "rv32bintrin.h" + +#elif defined(__riscv64__) + +#include "rv64bintrin.h" + +#endif // defined(__riscv64__) + +#undef __DEFAULT_FN_ATTRS + +#undef XLEN +#undef uint_xlen_t +#undef int_xlen_t +#endif // __RVINTRIN_H diff --git a/clang/test/Headers/rvintrin.c b/clang/test/Headers/rvintrin.c new file mode 100644 --- /dev/null +++ b/clang/test/Headers/rvintrin.c @@ -0,0 +1,37 @@ +// RUN: %clang -fsyntax-only -ffreestanding --target=riscv32 -std=c89 -xc %s +// RUN: %clang -fsyntax-only -Wall -Werror -ffreestanding --target=riscv32 -std=c99 -xc %s +// RUN: %clang -fsyntax-only -Wall -Werror -ffreestanding --target=riscv32 -std=c11 -xc %s + +// RUN: %clang -fsyntax-only -ffreestanding --target=riscv64 -std=c89 -xc %s +// RUN: %clang -fsyntax-only -Wall -Werror -ffreestanding --target=riscv64 -std=c99 -xc %s +// RUN: %clang -fsyntax-only -Wall -Werror -ffreestanding --target=riscv64 -std=c11 -xc %s + +// RUN: %clang -fsyntax-only -Wall -Werror -ffreestanding --target=riscv32 -std=c++98 -xc++ %s +// RUN: %clang -fsyntax-only -Wall -Werror -ffreestanding --target=riscv32 -std=c++11 -xc++ %s +// RUN: %clang -fsyntax-only -Wall -Werror -ffreestanding --target=riscv32 -std=c++14 -xc++ %s +// RUN: %clang -fsyntax-only -Wall -Werror -ffreestanding --target=riscv32 -std=c++17 -xc++ %s + +// RUN: %clang -fsyntax-only -Wall -Werror -ffreestanding --target=riscv64 -std=c++98 -xc++ %s +// RUN: %clang -fsyntax-only -Wall -Werror -ffreestanding --target=riscv64 -std=c++11 -xc++ %s +// RUN: %clang -fsyntax-only -Wall -Werror -ffreestanding --target=riscv64 -std=c++14 -xc++ %s +// RUN: %clang -fsyntax-only -Wall -Werror -ffreestanding --target=riscv64 -std=c++17 -xc++ %s + +// RUN: %clang -fsyntax-only -ffreestanding --target=riscv32 -Xclang -target-feature -Xclang +b -std=c89 -xc %s +// RUN: %clang -fsyntax-only -Wall -Werror -ffreestanding --target=riscv32 -Xclang -target-feature -Xclang +b -std=c99 -xc %s +// RUN: %clang -fsyntax-only -Wall -Werror -ffreestanding --target=riscv32 -Xclang -target-feature -Xclang +b -std=c11 -xc %s + +// RUN: %clang -fsyntax-only -ffreestanding --target=riscv64 -Xclang -target-feature -Xclang +b -std=c89 -xc %s +// RUN: %clang -fsyntax-only -Wall -Werror -ffreestanding --target=riscv64 -Xclang -target-feature -Xclang +b -std=c99 -xc %s +// RUN: %clang -fsyntax-only -Wall -Werror -ffreestanding --target=riscv64 -Xclang -target-feature -Xclang +b -std=c11 -xc %s + +// RUN: %clang -fsyntax-only -Wall -Werror -ffreestanding --target=riscv32 -Xclang -target-feature -Xclang +b -std=c++98 -xc++ %s +// RUN: %clang -fsyntax-only -Wall -Werror -ffreestanding --target=riscv32 -Xclang -target-feature -Xclang +b -std=c++11 -xc++ %s +// RUN: %clang -fsyntax-only -Wall -Werror -ffreestanding --target=riscv32 -Xclang -target-feature -Xclang +b -std=c++14 -xc++ %s +// RUN: %clang -fsyntax-only -Wall -Werror -ffreestanding --target=riscv32 -Xclang -target-feature -Xclang +b -std=c++17 -xc++ %s + +// RUN: %clang -fsyntax-only -Wall -Werror -ffreestanding --target=riscv64 -Xclang -target-feature -Xclang +b -std=c++98 -xc++ %s +// RUN: %clang -fsyntax-only -Wall -Werror -ffreestanding --target=riscv64 -Xclang -target-feature -Xclang +b -std=c++11 -xc++ %s +// RUN: %clang -fsyntax-only -Wall -Werror -ffreestanding --target=riscv64 -Xclang -target-feature -Xclang +b -std=c++14 -xc++ %s +// RUN: %clang -fsyntax-only -Wall -Werror -ffreestanding --target=riscv64 -Xclang -target-feature -Xclang +b -std=c++17 -xc++ %s + +#include diff --git a/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn --- a/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn +++ b/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn @@ -132,6 +132,16 @@ "ptwriteintrin.h", "rdseedintrin.h", "rtmintrin.h", + "rv32bintrin-builtins.h", + "rv32bintrin-emulation.h", + "rv32bintrin.h", + "rv64bintrin-asm.h", + "rv64bintrin-builtins.h", + "rv64bintrin-emulation.h", + "rv64bintrin.h", + "rvbintrin-asm.h", + "rvbintrin-emulation.h", + "rvintrin.h", "s390intrin.h", "sgxintrin.h", "shaintrin.h",