Index: clang-tools-extra/clang-include-fixer/find-all-symbols/STLPostfixHeaderMap.cpp =================================================================== --- clang-tools-extra/clang-include-fixer/find-all-symbols/STLPostfixHeaderMap.cpp +++ clang-tools-extra/clang-include-fixer/find-all-symbols/STLPostfixHeaderMap.cpp @@ -57,6 +57,16 @@ {"include/prfchwintrin.h$", ""}, {"include/rdseedintrin.h$", ""}, {"include/rtmintrin.h$", ""}, + {"include/rv32bintrin-builtins.h$", ""}, + {"include/rv32bintrin-emulation.h$", ""}, + {"include/rv32bintrin.h$", ""}, + {"include/rv64bintrin-asm.h$", ""}, + {"include/rv64bintrin-builtins.h$", ""}, + {"include/rv64bintrin-emulation.h$", ""}, + {"include/rv64bintrin.h$", ""}, + {"include/rvbintrin-asm.h$", ""}, + {"include/rvbintrin-emulation.h$", ""}, + {"include/rvintrin.h$", ""}, {"include/shaintrin.h$", ""}, {"include/smmintrin.h$", ""}, {"include/stdalign.h$", ""}, Index: clang-tools-extra/clangd/index/CanonicalIncludes.cpp =================================================================== --- clang-tools-extra/clangd/index/CanonicalIncludes.cpp +++ clang-tools-extra/clangd/index/CanonicalIncludes.cpp @@ -150,6 +150,16 @@ {"include/prfchwintrin.h", ""}, {"include/rdseedintrin.h", ""}, {"include/rtmintrin.h", ""}, + {"include/rv32bintrin-builtins.h", ""}, + {"include/rv32bintrin-emulation.h", ""}, + {"include/rv32bintrin.h", ""}, + {"include/rv64bintrin-asm.h", ""}, + {"include/rv64bintrin-builtins.h", ""}, + {"include/rv64bintrin-emulation.h", ""}, + {"include/rv64bintrin.h", ""}, + {"include/rvbintrin-asm.h", ""}, + {"include/rvbintrin-emulation.h", ""}, + {"include/rvintrin.h", ""}, {"include/shaintrin.h", ""}, {"include/smmintrin.h", ""}, {"include/stdalign.h", ""}, Index: clang/lib/Basic/Targets/RISCV.h =================================================================== --- clang/lib/Basic/Targets/RISCV.h +++ clang/lib/Basic/Targets/RISCV.h @@ -30,11 +30,12 @@ bool HasF; bool HasD; bool HasC; + bool HasB; public: RISCVTargetInfo(const llvm::Triple &Triple, const TargetOptions &) - : TargetInfo(Triple), HasM(false), HasA(false), HasF(false), - HasD(false), HasC(false) { + : TargetInfo(Triple), HasM(false), HasA(false), HasF(false), HasD(false), + HasC(false), HasB(false) { LongDoubleWidth = 128; LongDoubleAlign = 128; LongDoubleFormat = &llvm::APFloat::IEEEquad(); Index: clang/lib/Basic/Targets/RISCV.cpp =================================================================== --- clang/lib/Basic/Targets/RISCV.cpp +++ clang/lib/Basic/Targets/RISCV.cpp @@ -125,6 +125,10 @@ if (HasC) Builder.defineMacro("__riscv_compressed"); + + if (HasB) { + Builder.defineMacro("__riscv_bitmanip"); + } } /// Return true if has this feature, need to sync with handleTargetFeatures. @@ -139,6 +143,7 @@ .Case("f", HasF) .Case("d", HasD) .Case("c", HasC) + .Case("b", HasB) .Default(false); } @@ -156,6 +161,8 @@ HasD = true; else if (Feature == "+c") HasC = true; + else if (Feature == "+b") + HasB = true; } return true; Index: clang/lib/Headers/CMakeLists.txt =================================================================== --- clang/lib/Headers/CMakeLists.txt +++ clang/lib/Headers/CMakeLists.txt @@ -86,6 +86,16 @@ ptwriteintrin.h rdseedintrin.h rtmintrin.h + rv32bintrin-builtins.h + rv32bintrin-emulation.h + rv32bintrin.h + rv64bintrin-asm.h + rv64bintrin-builtins.h + rv64bintrin-emulation.h + rv64bintrin.h + rvbintrin-asm.h + rvbintrin-emulation.h + rvintrin.h sgxintrin.h s390intrin.h shaintrin.h Index: clang/lib/Headers/intrin.h =================================================================== --- clang/lib/Headers/intrin.h +++ clang/lib/Headers/intrin.h @@ -28,6 +28,10 @@ #include #endif +#if defined(__riscv32__) || defined(__riscv64__) +#include +#endif + /* For the definition of jmp_buf. */ #if __STDC_HOSTED__ #include Index: clang/lib/Headers/rv32bintrin-builtins.h =================================================================== --- /dev/null +++ clang/lib/Headers/rv32bintrin-builtins.h @@ -0,0 +1,65 @@ +/* ===-------- rv32bintrin-builtins.h --------------------------------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __RV32BINTRIN_BUILTINS_H +#define __RV32BINTRIN_BUILTINS_H + +#ifndef __RVINTRIN_H +#error \ + "Never use directly; include instead." +#endif + +// These are available regardless of whether or not the Bitmanip extension is +// enabled or not. + +// Our approach to these functions is to use inline asm when the Bitmanip +// extension is enabled and use replacment C code when it's disabled. However, +// where LLVM builtins are already avaiable we are making use of them. + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_clz(const uint_xlen_t rs1) { + // Calling these builtins with 0 results in undefined behaviour. + if (rs1 == 0) { + return XLEN; + } + return __builtin_clz(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_ctz(const uint_xlen_t rs1) { + if (rs1 == 0) { + return XLEN; + } + return __builtin_ctz(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_pcnt(const uint_xlen_t rs1) { + return __builtin_popcount(rs1); +} + +// Genric aliases +// e.g. _rv_* is an alias of _rv64_* + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_clz(const uint_xlen_t rs1) { + return _rv32_clz(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_ctz(const uint_xlen_t rs1) { + return _rv32_ctz(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_pcnt(const uint_xlen_t rs1) { + return _rv32_pcnt(rs1); +} + +#endif //__RV32BINTRIN_BUILTINS_H Index: clang/lib/Headers/rv32bintrin-emulation.h =================================================================== --- /dev/null +++ clang/lib/Headers/rv32bintrin-emulation.h @@ -0,0 +1,106 @@ +/* ===-------- rv32bintrin-emulation.h -------------------------------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __RV32BINTRIN_EMULATION_H +#define __RV32BINTRIN_EMULATION_H + +#ifndef __RVINTRIN_H +#error \ + "Never use directly; include instead." +#endif + +#include + +#include "rvbintrin-emulation.h" + +// If the builtins are unavailable then these provide the same functionality. + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS _rv_gorc(const uint32_t rs1, + const uint32_t rs2) { + uint32_t x = rs1; + const uint_xlen_t shamt = rs2 & 31; + if (shamt & 1) { + x |= ((x & 0x55555555) << 1) | ((x & 0xAAAAAAAA) >> 1); + } + if (shamt & 2) { + x |= ((x & 0x33333333) << 2) | ((x & 0xCCCCCCCC) >> 2); + } + if (shamt & 4) { + x |= ((x & 0x0F0F0F0F) << 4) | ((x & 0xF0F0F0F0) >> 4); + } + if (shamt & 8) { + x |= ((x & 0x00FF00FF) << 8) | ((x & 0xFF00FF00) >> 8); + } + if (shamt & 16) { + x |= ((x & 0x0000FFFF) << 16) | ((x & 0xFFFF0000) >> 16); + } + return x; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS _rv_grev(const uint32_t rs1, + const uint32_t rs2) { + uint32_t x = rs1; + const uint_xlen_t shamt = rs2 & 31; + if (shamt & 1) { + x = ((x & 0x55555555) << 1) | ((x & 0xAAAAAAAA) >> 1); + } + if (shamt & 2) { + x = ((x & 0x33333333) << 2) | ((x & 0xCCCCCCCC) >> 2); + } + if (shamt & 4) { + x = ((x & 0x0F0F0F0F) << 4) | ((x & 0xF0F0F0F0) >> 4); + } + if (shamt & 8) { + x = ((x & 0x00FF00FF) << 8) | ((x & 0xFF00FF00) >> 8); + } + if (shamt & 16) { + x = ((x & 0x0000FFFF) << 16) | ((x & 0xFFFF0000) >> 16); + } + return x; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS _rv_shfl(const uint32_t rs1, + const uint32_t rs2) { + uint32_t x = rs1; + const uint_xlen_t shamt = rs2 & 15; + if (shamt & 8) { + x = shuffle_stage(x, 0x00ff0000, 0x0000ff00, 8); + } + if (shamt & 4) { + x = shuffle_stage(x, 0x0f000f00, 0x00f000f0, 4); + } + if (shamt & 2) { + x = shuffle_stage(x, 0x30303030, 0x0c0c0c0c, 2); + } + if (shamt & 1) { + x = shuffle_stage(x, 0x44444444, 0x22222222, 1); + } + return x; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_unshfl(const uint32_t rs1, const uint32_t rs2) { + uint32_t x = rs1; + const uint_xlen_t shamt = rs2 & 15; + if (shamt & 1) { + x = shuffle_stage(x, 0x44444444, 0x22222222, 1); + } + if (shamt & 2) { + x = shuffle_stage(x, 0x30303030, 0x0c0c0c0c, 2); + } + if (shamt & 4) { + x = shuffle_stage(x, 0x0f000f00, 0x00f000f0, 4); + } + if (shamt & 8) { + x = shuffle_stage(x, 0x00ff0000, 0x0000ff00, 8); + } + return x; +} + +#endif //__RV32BINTRIN_EMULATION_H Index: clang/lib/Headers/rv32bintrin.h =================================================================== --- /dev/null +++ clang/lib/Headers/rv32bintrin.h @@ -0,0 +1,748 @@ +/* ===-------- rv32bintrin.h -----------------------------------------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __RV32BINTRIN_H +#define __RV32BINTRIN_H + +#ifndef __RVINTRIN_H +#error "Never use directly; include instead." +#endif + +#include "rv32bintrin-builtins.h" + +#if defined(__riscv_bitmanip) + +#include "rvbintrin-asm.h" + +#else // Bitmanip extension is disabled. + +#include "rv32bintrin-emulation.h" + +#endif // defined(__riscv_bitmanip) + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_andn(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_andn(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_fsl(const uint_xlen_t rs1, const uint_xlen_t rs2, const uint_xlen_t rs3) { + return _rv_fsl(rs1, rs2, rs3); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_fsr(const uint_xlen_t rs1, const uint_xlen_t rs2, const uint_xlen_t rs3) { + return _rv_fsr(rs1, rs2, rs3); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_max(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_max(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_maxu(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_maxu(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_min(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_min(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_minu(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_minu(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_orn(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_orn(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_rol(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_rol(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_ror(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_ror(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_bdep(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_bdep(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_bext(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_bext(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_bfp(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_bfp(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_clmul(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_clmul(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_clmulh(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_clmulh(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_clmulr(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_clmulr(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS _rv32_cmix( + const uint_xlen_t rs1, const uint_xlen_t rs2, const uint_xlen_t rs3) { + return _rv_cmix(rs1, rs2, rs3); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS _rv32_cmov( + const uint_xlen_t rs1, const uint_xlen_t rs2, const uint_xlen_t rs3) { + return _rv_cmov(rs1, rs2, rs3); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_crc32b(const uint_xlen_t rs1) { + return _rv_crc32b(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_crc32h(const uint_xlen_t rs1) { + return _rv_crc32h(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_crc32w(const uint_xlen_t rs1) { + return _rv_crc32w(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_crc32cb(const uint_xlen_t rs1) { + return _rv_crc32cb(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_crc32ch(const uint_xlen_t rs1) { + return _rv_crc32cb(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_crc32cw(const uint_xlen_t rs1) { + return _rv_crc32cb(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_gorc(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_gorc(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_grev(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_grev(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_pack(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_pack(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_packu(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_packu(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_packh(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_packh(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_sextb(const uint_xlen_t rs1) { + return _rv_sextb(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_sexth(const uint_xlen_t rs1) { + return _rv_sexth(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_sbclr(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_sbclr(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_sbext(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_sbext(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_sbinv(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_sbinv(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_sbset(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_sbset(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_shfl(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_shfl(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_slo(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_slo(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_sro(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_sro(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_unshfl(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_unshfl(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_xnor(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_xnor(rs1, rs2); +} + +// Pseudo Instructions + +// REV (GREVI) pseudo instructions + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_revp(const uint_xlen_t rs1) { + return _rv32_grev(rs1, 0b00001); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_rev2n(const uint_xlen_t rs1) { + return _rv32_grev(rs1, 0b00010); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_revn(const uint_xlen_t rs1) { + return _rv32_grev(rs1, 0b00011); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_rev4b(const uint_xlen_t rs1) { + return _rv32_grev(rs1, 0b00100); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_rev2b(const uint_xlen_t rs1) { + return _rv32_grev(rs1, 0b00110); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_revb(const uint_xlen_t rs1) { + return _rv32_grev(rs1, 0b00111); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_rev8h(const uint_xlen_t rs1) { + return _rv32_grev(rs1, 0b01000); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_rev4h(const uint_xlen_t rs1) { + return _rv32_grev(rs1, 0b01100); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_rev2h(const uint_xlen_t rs1) { + return _rv32_grev(rs1, 0b01110); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_revh(const uint_xlen_t rs1) { + return _rv32_grev(rs1, 0b01111); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_rev16(const uint_xlen_t rs1) { + return _rv32_grev(rs1, 0b10000); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_rev8(const uint_xlen_t rs1) { + return _rv32_grev(rs1, 0b11000); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_rev4(const uint_xlen_t rs1) { + return _rv32_grev(rs1, 0b11100); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_rev2(const uint_xlen_t rs1) { + return _rv32_grev(rs1, 0b11110); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_rev(const uint_xlen_t rs1) { + return _rv32_grev(rs1, 0b11111); +} + +// ORC (GORCI) pseudo instructions + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_orcp(const uint_xlen_t rs1) { + return _rv32_gorc(rs1, 0b00001); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_orc2n(const uint_xlen_t rs1) { + return _rv32_gorc(rs1, 0b00010); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_orcn(const uint_xlen_t rs1) { + return _rv32_gorc(rs1, 0b00011); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_orc4b(const uint_xlen_t rs1) { + return _rv32_gorc(rs1, 0b00100); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_orc2b(const uint_xlen_t rs1) { + return _rv32_gorc(rs1, 0b00110); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_orcb(const uint_xlen_t rs1) { + return _rv32_gorc(rs1, 0b00111); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_orc8h(const uint_xlen_t rs1) { + return _rv32_gorc(rs1, 0b01000); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_orc4h(const uint_xlen_t rs1) { + return _rv32_gorc(rs1, 0b01100); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_orc2h(const uint_xlen_t rs1) { + return _rv32_gorc(rs1, 0b01110); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_orch(const uint_xlen_t rs1) { + return _rv32_gorc(rs1, 0b01111); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_orc16(const uint_xlen_t rs1) { + return _rv32_gorc(rs1, 0b10000); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_orc8(const uint_xlen_t rs1) { + return _rv32_gorc(rs1, 0b11000); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_orc4(const uint_xlen_t rs1) { + return _rv32_gorc(rs1, 0b11100); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_orc2(const uint_xlen_t rs1) { + return _rv32_gorc(rs1, 0b11110); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_orc(const uint_xlen_t rs1) { + return _rv32_gorc(rs1, 0b11111); +} + +// ZIP (SHFLI) pseudo instructions + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_zipn(const uint_xlen_t rs1) { + return _rv32_shfl(rs1, 0b0001); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_zip2b(const uint_xlen_t rs1) { + return _rv32_shfl(rs1, 0b0010); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_zipb(const uint_xlen_t rs1) { + return _rv32_shfl(rs1, 0b0011); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_zip4h(const uint_xlen_t rs1) { + return _rv32_shfl(rs1, 0b0100); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_zip2h(const uint_xlen_t rs1) { + return _rv32_shfl(rs1, 0b0110); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_ziph(const uint_xlen_t rs1) { + return _rv32_shfl(rs1, 0b0111); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_zip8(const uint_xlen_t rs1) { + return _rv32_shfl(rs1, 0b1000); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_zip4(const uint_xlen_t rs1) { + return _rv32_shfl(rs1, 0b1100); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_zip2(const uint_xlen_t rs1) { + return _rv32_shfl(rs1, 0b1110); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_zip(const uint_xlen_t rs1) { + return _rv32_shfl(rs1, 0b1111); +} + +// UNZIP (UNSHFL) pseudo instructions + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_unzipn(const uint_xlen_t rs1) { + return _rv32_unshfl(rs1, 0b0001); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_unzip2b(const uint_xlen_t rs1) { + return _rv32_unshfl(rs1, 0b0010); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_unzipb(const uint_xlen_t rs1) { + return _rv32_unshfl(rs1, 0b0011); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_unzip4h(const uint_xlen_t rs1) { + return _rv32_unshfl(rs1, 0b0100); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_unzip2h(const uint_xlen_t rs1) { + return _rv32_unshfl(rs1, 0b0110); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_unziph(const uint_xlen_t rs1) { + return _rv32_unshfl(rs1, 0b0111); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_unzip8(const uint_xlen_t rs1) { + return _rv32_unshfl(rs1, 0b1000); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_unzip4(const uint_xlen_t rs1) { + return _rv32_unshfl(rs1, 0b1100); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_unzip2(const uint_xlen_t rs1) { + return _rv32_unshfl(rs1, 0b1110); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv32_unzip(const uint_xlen_t rs1) { + return _rv32_unshfl(rs1, 0b1111); +} + +// Genric aliases for 32 bit pseudo instructions +// e.g. _rv_* is an alias of _rv32_* + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_revp(const uint_xlen_t rs1) { + return _rv32_revp(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_rev2n(const uint_xlen_t rs1) { + return _rv32_rev2n(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_revn(const uint_xlen_t rs1) { + return _rv32_revn(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_rev4b(const uint_xlen_t rs1) { + return _rv32_rev4b(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_rev2b(const uint_xlen_t rs1) { + return _rv32_rev2b(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_revb(const uint_xlen_t rs1) { + return _rv32_revb(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_rev8h(const uint_xlen_t rs1) { + return _rv32_rev8h(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_rev4h(const uint_xlen_t rs1) { + return _rv32_rev4h(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_rev2h(const uint_xlen_t rs1) { + return _rv32_rev2h(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_revh(const uint_xlen_t rs1) { + return _rv32_revh(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_rev16(const uint_xlen_t rs1) { + return _rv32_rev16(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_rev8(const uint_xlen_t rs1) { + return _rv32_rev8(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_rev4(const uint_xlen_t rs1) { + return _rv32_rev4(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_rev2(const uint_xlen_t rs1) { + return _rv32_rev2(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_rev(const uint_xlen_t rs1) { + return _rv32_rev(rs1); +} + +// ORC (GORCI) pseudo instructions + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_orcp(const uint_xlen_t rs1) { + return _rv32_orcp(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_orc2n(const uint_xlen_t rs1) { + return _rv32_orc2n(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_orcn(const uint_xlen_t rs1) { + return _rv32_orcn(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_orc4b(const uint_xlen_t rs1) { + return _rv32_orc4b(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_orc2b(const uint_xlen_t rs1) { + return _rv32_orc2b(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_orcb(const uint_xlen_t rs1) { + return _rv32_orcb(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_orc8h(const uint_xlen_t rs1) { + return _rv32_orc8h(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_orc4h(const uint_xlen_t rs1) { + return _rv32_orc4h(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_orc2h(const uint_xlen_t rs1) { + return _rv32_orc2h(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_orch(const uint_xlen_t rs1) { + return _rv32_orch(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_orc16(const uint_xlen_t rs1) { + return _rv32_orc16(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_orc8(const uint_xlen_t rs1) { + return _rv32_orc8(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_orc4(const uint_xlen_t rs1) { + return _rv32_orc4(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_orc2(const uint_xlen_t rs1) { + return _rv32_orc2(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_orc(const uint_xlen_t rs1) { + return _rv32_orc(rs1); +} + +// ZIP (SHFLI) pseudo instructions + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_zipn(const uint_xlen_t rs1) { + return _rv32_zipn(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_zip2b(const uint_xlen_t rs1) { + return _rv32_zip2b(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_zipb(const uint_xlen_t rs1) { + return _rv32_zipb(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_zip4h(const uint_xlen_t rs1) { + return _rv32_zip4h(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_zip2h(const uint_xlen_t rs1) { + return _rv32_zip2h(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_ziph(const uint_xlen_t rs1) { + return _rv32_ziph(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_zip8(const uint_xlen_t rs1) { + return _rv32_zip8(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_zip4(const uint_xlen_t rs1) { + return _rv32_zip4(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_zip2(const uint_xlen_t rs1) { + return _rv32_zip2(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_zip(const uint_xlen_t rs1) { + return _rv32_zip(rs1); +} + +// UNZIP (UNSHFL) pseudo instructions + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_unzipn(const uint_xlen_t rs1) { + return _rv32_unzipn(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_unzip2b(const uint_xlen_t rs1) { + return _rv32_unzip2b(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_unzipb(const uint_xlen_t rs1) { + return _rv32_unzipb(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_unzip4h(const uint_xlen_t rs1) { + return _rv32_unzip4h(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_unzip2h(const uint_xlen_t rs1) { + return _rv32_unzip2h(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_unziph(const uint_xlen_t rs1) { + return _rv32_unziph(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_unzip8(const uint_xlen_t rs1) { + return _rv32_unzip8(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_unzip4(const uint_xlen_t rs1) { + return _rv32_unzip4(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_unzip2(const uint_xlen_t rs1) { + return _rv32_unzip2(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_unzip(const uint_xlen_t rs1) { + return _rv32_unzip(rs1); +} + +#endif // __RV32BINTRIN_H Index: clang/lib/Headers/rv64bintrin-asm.h =================================================================== --- /dev/null +++ clang/lib/Headers/rv64bintrin-asm.h @@ -0,0 +1,284 @@ +/* ===-------- rv64bintrin-asm.h -------------------------------------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __RV64BINTRIN_ASM_H +#define __RV64BINTRIN_ASM_H + +#ifndef __RVINTRIN_H +#error "Never use directly; include instead." +#endif + +#include + +#include "rvbintrin-asm.h" + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_fslw(const uint32_t rs1, + const uint32_t rs2, + const uint32_t rs3) { + uint_xlen_t rd; + if (__builtin_constant_p(rs2)) { + __asm__("fsriw %0, %1, %2, %3" : "=r"(rd) : "r"(rs1), "r"(rs2), "i"(rs3)); + } else { + __asm__("fslw %0, %1, %2, %3" : "=r"(rd) : "r"(rs1), "r"(rs2), "r"(rs3)); + } + return rd; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_fsrw(const uint32_t rs1, + const uint32_t rs2, + const uint32_t rs3) { + uint_xlen_t rd; + __asm__("fsrw %0, %1, %2, %3" : "=r"(rd) : "r"(rs1), "r"(rs2), "r"(rs3)); + return rd; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_rolw(const uint32_t rs1, + const uint32_t rs2) { + uint_xlen_t rd; + __asm__("rolw %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + return rd; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_rorw(const uint32_t rs1, + const uint32_t rs2) { + uint_xlen_t rd; + if (__builtin_constant_p(rs2)) { + __asm__("roriw %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(rs2)); + } else { + __asm__("rorw %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + } + return rd; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_addwu(const uint32_t rs1, + const uint32_t rs2) { + uint_xlen_t rd; + if (__builtin_constant_p(rs2)) { + __asm__("addiwu %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(rs2)); + } else { + __asm__("addwu %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + } + return rd; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_adduw(const uint32_t rs1, + const uint32_t rs2) { + uint_xlen_t rd; + __asm__("adduw %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + return rd; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_bdepw(const uint32_t rs1, + const uint32_t rs2) { + uint_xlen_t rd; + __asm__("bdepw %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + return rd; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_bfpw(const uint32_t rs1, + const uint32_t rs2) { + uint_xlen_t rd; + __asm__("bfpw %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + return rd; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_bextw(const uint32_t rs1, + const uint32_t rs2) { + uint_xlen_t rd; + __asm__("bextw %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + return rd; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_bmatflip(const uint32_t rs1) { + uint_xlen_t rd; + __asm__("bmatflip %0, %1" : "=r"(rd) : "r"(rs1)); + return rd; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_bmator(const uint32_t rs1, + const uint32_t rs2) { + uint_xlen_t rd; + __asm__("bmator %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + return rd; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_bmatxor(const uint32_t rs1, + const uint32_t rs2) { + uint_xlen_t rd; + __asm__("bmatxor %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + return rd; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_clmulhw(const uint32_t rs1, + const uint32_t rs2) { + uint_xlen_t rd; + __asm__("clmulhw %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + return rd; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_clmulrw(const uint32_t rs1, + const uint32_t rs2) { + uint_xlen_t rd; + __asm__("clmulrw %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + return rd; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_clmulw(const uint32_t rs1, + const uint32_t rs2) { + uint_xlen_t rd; + __asm__("clmulw %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + return rd; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_crc32d(const uint32_t rs1) { + uint_xlen_t rd; + __asm__("crc32d %0, %1" : "=r"(rd) : "r"(rs1)); + return rd; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_crc32cd(const uint32_t rs1) { + uint_xlen_t rd; + __asm__("crc32cd %0, %1" : "=r"(rd) : "r"(rs1)); + return rd; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_gorcw(const uint32_t rs1, + const uint32_t rs2) { + uint_xlen_t rd; + if (__builtin_constant_p(rs2)) { + __asm__("gorciw %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(rs2)); + } else { + __asm__("gorcw %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + } + return rd; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_grevw(const uint32_t rs1, + const uint32_t rs2) { + uint_xlen_t rd; + if (__builtin_constant_p(rs2)) { + __asm__("greviw %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(rs2)); + } else { + __asm__("grevw %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + } + return rd; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_packw(const uint32_t rs1, + const uint32_t rs2) { + uint_xlen_t rd; + __asm__("packw %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + return rd; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_packuw(const uint32_t rs1, + const uint32_t rs2) { + uint_xlen_t rd; + __asm__("packuw %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + return rd; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_sbclrw(const uint32_t rs1, + const uint32_t rs2) { + uint_xlen_t rd; + if (__builtin_constant_p(rs2)) { + __asm__("sbclriw %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(rs2)); + } else { + __asm__("sbclrw %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + } + return rd; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_sbextw(const uint32_t rs1, + const uint32_t rs2) { + uint_xlen_t rd; + __asm__("sbextw %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + return rd; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_sbinvw(const uint32_t rs1, + const uint32_t rs2) { + uint_xlen_t rd; + if (__builtin_constant_p(rs2)) { + __asm__("sbinviw %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(rs2)); + } else { + __asm__("sbinvw %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + } + return rd; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_sbsetw(const uint32_t rs1, + const uint32_t rs2) { + uint_xlen_t rd; + if (__builtin_constant_p(rs2)) { + __asm__("sbsetiw %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(rs2)); + } else { + __asm__("sbsetw %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + } + return rd; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_shflw(const uint32_t rs1, + const uint32_t rs2) { + uint_xlen_t rd; + __asm__("shflw %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + return rd; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_slliuw(const uint32_t rs1, + const uint32_t rs2) { + uint_xlen_t rd; + __asm__("slliuw %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + return rd; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_slow(const uint32_t rs1, + const uint32_t rs2) { + uint_xlen_t rd; + if (__builtin_constant_p(rs2)) { + __asm__("sloiw %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(rs2)); + } else { + __asm__("slow %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + } + return rd; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_srow(const uint32_t rs1, + const uint32_t rs2) { + uint_xlen_t rd; + if (__builtin_constant_p(rs2)) { + __asm__("sroiw %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(rs2)); + } else { + __asm__("srow %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + } + return rd; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_subuw(const uint32_t rs1, + const uint32_t rs2) { + uint_xlen_t rd; + __asm__("subuw %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + return rd; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_subwu(const uint32_t rs1, + const uint32_t rs2) { + uint_xlen_t rd; + __asm__("subwu %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + return rd; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_unshflw(const uint32_t rs1, + const uint32_t rs2) { + uint_xlen_t rd; + __asm__("unshflw %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + return rd; +} + +#endif //__RV64BINTRIN_ASM_H Index: clang/lib/Headers/rv64bintrin-builtins.h =================================================================== --- /dev/null +++ clang/lib/Headers/rv64bintrin-builtins.h @@ -0,0 +1,96 @@ +/* ===-------- rv64bintrin-builtins.h --------------------------------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __RV64BINTRIN_BUILTINS_H +#define __RV64BINTRIN_BUILTINS_H + +#ifndef __RVINTRIN_H +#error \ + "Never use directly; include instead." +#endif + +#include + +// These are available regardless of whether or not the Bitmanip extension is +// enabled or not. + +// Our approach to these functions is to use inline asm when the Bitmanip +// extension is enabled and use replacment C code when it's disabled. However, +// where LLVM builtins are already avaiable we are making use of them. + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_clz(const uint_xlen_t rs1) { + if (rs1 == 0) { + return XLEN; + } + return __builtin_clzll(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_ctz(const uint_xlen_t rs1) { + if (rs1 == 0) { + return XLEN; + } + return __builtin_ctzll(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_pcnt(const uint_xlen_t rs1) { + return __builtin_popcountll(rs1); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv64_clzw(const uint32_t rs1) { + if (rs1 == 0) { + return XLEN; + } + return __builtin_clz(rs1); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv64_ctzw(const uint32_t rs1) { + if (rs1 == 0) { + return XLEN; + } + return __builtin_ctz(rs1); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv64_pcntw(const uint32_t rs1) { + return __builtin_popcount(rs1); +} + +// Genric aliases +// e.g. _rv_* is an alias of _rv64_* + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_clz(const uint_xlen_t rs1) { + return _rv64_clz(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_ctz(const uint_xlen_t rs1) { + return _rv64_ctz(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_pcnt(const uint_xlen_t rs1) { + return _rv64_pcnt(rs1); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_clzw(const uint32_t rs1) { + return _rv64_clzw(rs1); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_ctzw(const uint32_t rs1) { + return _rv64_ctzw(rs1); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_pcntw(const uint32_t rs1) { + return _rv64_pcntw(rs1); +} + +#endif //__RV64BINTRIN_BUILTINS_H Index: clang/lib/Headers/rv64bintrin-emulation.h =================================================================== --- /dev/null +++ clang/lib/Headers/rv64bintrin-emulation.h @@ -0,0 +1,438 @@ +/* ===-------- rv64bintrin-emulation.h -------------------------------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __RV64INTRIN_EMULATION_H +#define __RV64INTRIN_EMULATION_H + +#ifndef __RVINTRIN_H +#error \ + "Never use directly; include instead." +#endif + +#include + +#include "rvbintrin-emulation.h" + +static __inline__ uint32_t shuffle32_stage(const uint32_t src, + const uint32_t maskL, + const uint32_t maskR, + const uint_xlen_t N) { + uint32_t x = src & ~(maskL | maskR); + x |= ((src << N) & maskL) | ((src >> N) & maskR); + return x; +} + +static __inline__ uint64_t __DEFAULT_FN_ATTRS _rv_shfl(const uint64_t rs1, + const uint64_t rs2) { + uint64_t x = rs1; + const uint_xlen_t shamt = rs2 & 31; + if (shamt & 16) { + x = shuffle_stage(x, 0x0000ffff00000000LL, 0x00000000ffff0000LL, 16); + } + if (shamt & 8) { + x = shuffle_stage(x, 0x00ff000000ff0000LL, 0x0000ff000000ff00LL, 8); + } + if (shamt & 4) { + x = shuffle_stage(x, 0x0f000f000f000f00LL, 0x00f000f000f000f0LL, 4); + } + if (shamt & 2) { + x = shuffle_stage(x, 0x3030303030303030LL, 0x0c0c0c0c0c0c0c0cLL, 2); + } + if (shamt & 1) { + x = shuffle_stage(x, 0x4444444444444444LL, 0x2222222222222222LL, 1); + } + + return x; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_shflw(const uint32_t rs1, + const uint32_t rs2) { + uint32_t x = rs1; + const uint_xlen_t shamt = rs2 & 15; + if (shamt & 8) { + x = shuffle32_stage(x, 0x00ff0000, 0x0000ff00, 8); + } + if (shamt & 4) { + x = shuffle32_stage(x, 0x0f000f00, 0x00f000f0, 4); + } + if (shamt & 2) { + x = shuffle32_stage(x, 0x30303030, 0x0c0c0c0c, 2); + } + if (shamt & 1) { + x = shuffle32_stage(x, 0x44444444, 0x22222222, 1); + } + return x; +} + +static __inline__ uint64_t __DEFAULT_FN_ATTRS _rv_bmatflip(const uint64_t rs1) { + uint64_t x = rs1; + x = _rv_shfl(x, 31); + x = _rv_shfl(x, 31); + x = _rv_shfl(x, 31); + return x; +} + +static __inline__ uint64_t __DEFAULT_FN_ATTRS _rv_bmator(const uint64_t rs1, + const uint64_t rs2) { + // transpose of rs2 + const uint64_t rs2t = _rv_bmatflip(rs2); + uint8_t u[8]; // rows of rs1 + uint8_t v[8]; // cols of rs2 + for (uint_xlen_t i = 0; i < 8; i++) { + u[i] = rs1 >> (i * 8); + v[i] = rs2t >> (i * 8); + } + uint64_t x = 0; + for (uint_xlen_t i = 0; i < 8; i++) { + if ((u[i / 8] & v[i % 8]) != 0) { + x |= 1LL << i; + } + } + return x; +} + +static __inline__ uint64_t __DEFAULT_FN_ATTRS _rv_bmatxor(const uint64_t rs1, + const uint64_t rs2) { + // transpose of rs2 + const uint64_t rs2t = _rv_bmatflip(rs2); + uint8_t u[8]; // rows of rs1 + uint8_t v[8]; // cols of rs2 + for (uint_xlen_t i = 0; i < 8; i++) { + u[i] = rs1 >> (i * 8); + v[i] = rs2t >> (i * 8); + } + uint64_t x = 0; + for (uint_xlen_t i = 0; i < 64; i++) { + if (_rv_pcnt(u[i / 8] & v[i % 8]) & 1) { + x |= 1LL << i; + } + } + return x; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_fslw(const uint32_t rs1, + const uint32_t rs2, + const uint32_t rs3) { + uint32_t shamt = rs2 & (2 * XLEN - 1); + uint32_t A = rs1; + uint32_t B = rs3; + if (shamt >= XLEN) { + shamt -= XLEN; + A = rs3; + B = rs1; + } + return shamt ? (A << shamt) | (B >> (XLEN - shamt)) : A; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_fsrw(const uint32_t rs1, + const uint32_t rs2, + const uint32_t rs3) { + uint32_t shamt = rs2 & (2 * XLEN - 1); + uint32_t A = rs1; + uint32_t B = rs3; + if (shamt >= XLEN) { + shamt -= XLEN; + A = rs3; + B = rs1; + } + return shamt ? (A >> shamt) | (B << (XLEN - shamt)) : A; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_rolw(const uint32_t rs1, + const uint32_t rs2) { + const uint32_t shamt = rs2 & (XLEN - 1); + return (rs1 << shamt) | (rs1 >> ((XLEN - shamt) & (XLEN - 1))); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_rorw(const uint32_t rs1, + const uint32_t rs2) { + const uint32_t shamt = rs2 & (XLEN - 1); + return (rs1 >> shamt) | (rs1 << ((XLEN - shamt) & (XLEN - 1))); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_adduw(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return rs1 + (rs2 & 0xFFFFFFFF); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_addwu(const uint32_t rs1, + const uint32_t rs2) { + return (rs1 + rs2) & 0xFFFFFFFF; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_bdepw(const uint32_t rs1, + const uint32_t rs2) { + uint32_t r = 0; + for (uint_xlen_t i = 0, j = 0; i < XLEN; i++) { + if ((rs2 >> i) & 1) { + if ((rs1 >> j) & 1) { + r |= 1 << i; + } + j++; + } + } + return r; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_bextw(const uint32_t rs1, + const uint32_t rs2) { + uint32_t r = 0; + for (uint_xlen_t i = 0, j = 0; i < XLEN; i++) { + if ((rs2 >> i) & 1) { + if ((rs1 >> i) & 1) { + r |= 1 << j; + } + j++; + } + } + return r; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_bfpw(const uint32_t rs1, + const uint32_t rs2) { + uint_xlen_t len = (rs2 >> 24) & 15; + const uint32_t off = (rs2 >> 16) & (XLEN - 1); + len = len ? len : 16; + const uint32_t mask = _rv_rol(_rv_slo(0, len), off); + const uint32_t data = _rv_rol(rs2, off); + return (data & mask) | (rs1 & ~mask); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_clmulhw(const uint32_t rs1, + const uint32_t rs2) { + uint32_t x = 0; + for (uint_xlen_t i = 1; i < XLEN; i++) { + if ((rs2 >> i) & 1) { + x ^= rs1 >> (XLEN - i); + } + } + return x; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_clmulrw(const uint32_t rs1, + const uint32_t rs2) { + uint32_t x = 0; + for (uint_xlen_t i = 0; i < XLEN; i++) { + if ((rs2 >> i) & 1) { + x ^= rs1 >> (XLEN - i - 1); + } + } + return x; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_clmulw(const uint32_t rs1, + const uint32_t rs2) { + uint32_t x = 0; + for (uint_xlen_t i = 0; i < XLEN; i++) { + if ((rs2 >> i) & 1) { + x ^= rs1 << i; + } + } + return x; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_crc32d(const uint_xlen_t rs1) { + return crc32(rs1, 64); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_crc32cd(const uint_xlen_t rs1) { + return crc32c(rs1, 64); +} + +static __inline__ uint64_t __DEFAULT_FN_ATTRS _rv_gorc(const uint64_t rs1, + const uint64_t rs2) { + uint64_t x = rs1; + const uint_xlen_t shamt = rs2 & 63; + if (shamt & 1) + x |= ((x & 0x5555555555555555LL) << 1) | ((x & 0xAAAAAAAAAAAAAAAALL) >> 1); + if (shamt & 2) + x |= ((x & 0x3333333333333333LL) << 2) | ((x & 0xCCCCCCCCCCCCCCCCLL) >> 2); + if (shamt & 4) + x |= ((x & 0x0F0F0F0F0F0F0F0FLL) << 4) | ((x & 0xF0F0F0F0F0F0F0F0LL) >> 4); + if (shamt & 8) + x |= ((x & 0x00FF00FF00FF00FFLL) << 8) | ((x & 0xFF00FF00FF00FF00LL) >> 8); + if (shamt & 16) + x |= + ((x & 0x0000FFFF0000FFFFLL) << 16) | ((x & 0xFFFF0000FFFF0000LL) >> 16); + if (shamt & 32) + x |= + ((x & 0x00000000FFFFFFFFLL) << 32) | ((x & 0xFFFFFFFF00000000LL) >> 32); + return x; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_gorcw(const uint32_t rs1, + const uint32_t rs2) { + uint32_t x = rs1; + const uint_xlen_t shamt = rs2 & 31; + if (shamt & 1) { + x |= ((x & 0x55555555) << 1) | ((x & 0xAAAAAAAA) >> 1); + } + if (shamt & 2) { + x |= ((x & 0x33333333) << 2) | ((x & 0xCCCCCCCC) >> 2); + } + if (shamt & 4) { + x |= ((x & 0x0F0F0F0F) << 4) | ((x & 0xF0F0F0F0) >> 4); + } + if (shamt & 8) { + x |= ((x & 0x00FF00FF) << 8) | ((x & 0xFF00FF00) >> 8); + } + if (shamt & 16) { + x |= ((x & 0x0000FFFF) << 16) | ((x & 0xFFFF0000) >> 16); + } + return x; +} + +static __inline__ uint64_t __DEFAULT_FN_ATTRS _rv_grev(const uint64_t rs1, + const uint64_t rs2) { + uint64_t x = rs1; + const uint_xlen_t shamt = rs2 & 63; + if (shamt & 1) { + x = ((x & 0x5555555555555555LL) << 1) | ((x & 0xAAAAAAAAAAAAAAAALL) >> 1); + } + if (shamt & 2) { + x = ((x & 0x3333333333333333LL) << 2) | ((x & 0xCCCCCCCCCCCCCCCCLL) >> 2); + } + if (shamt & 4) { + x = ((x & 0x0F0F0F0F0F0F0F0FLL) << 4) | ((x & 0xF0F0F0F0F0F0F0F0LL) >> 4); + } + if (shamt & 8) { + x = ((x & 0x00FF00FF00FF00FFLL) << 8) | ((x & 0xFF00FF00FF00FF00LL) >> 8); + } + if (shamt & 16) { + x = ((x & 0x0000FFFF0000FFFFLL) << 16) | ((x & 0xFFFF0000FFFF0000LL) >> 16); + } + if (shamt & 32) { + x = ((x & 0x00000000FFFFFFFFLL) << 32) | ((x & 0xFFFFFFFF00000000LL) >> 32); + } + return x; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_grevw(const uint32_t rs1, + const uint32_t rs2) { + uint32_t x = rs1; + const uint_xlen_t shamt = rs2 & 31; + if (shamt & 1) { + x = ((x & 0x55555555) << 1) | ((x & 0xAAAAAAAA) >> 1); + } + if (shamt & 2) { + x = ((x & 0x33333333) << 2) | ((x & 0xCCCCCCCC) >> 2); + } + if (shamt & 4) { + x = ((x & 0x0F0F0F0F) << 4) | ((x & 0xF0F0F0F0) >> 4); + } + if (shamt & 8) { + x = ((x & 0x00FF00FF) << 8) | ((x & 0xFF00FF00) >> 8); + } + if (shamt & 16) { + x = ((x & 0x0000FFFF) << 16) | ((x & 0xFFFF0000) >> 16); + } + return x; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_packw(const uint32_t rs1, + const uint32_t rs2) { + // XLEN / 4 to get half of a 32 bit value on riscv64 + return (rs2 << XLEN / 4) | ((rs1 << XLEN / 4) >> XLEN / 4); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_packuw(const uint32_t rs1, + const uint32_t rs2) { + // XLEN / 4 to get half of a 32 bit value on riscv64 + return (rs1 >> XLEN / 4) | ((rs2 >> XLEN / 4) << XLEN / 4) +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_sbclrw(const uint32_t rs1, + const uint32_t rs2) { + return rs1 & ~(1 << (rs2 & (XLEN - 1))); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_sbextw(const uint32_t rs1, + const uint32_t rs2) { + return 1 & (rs1 >> (rs2 & (XLEN - 1))); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_sbinvw(const uint32_t rs1, + const uint32_t rs2) { + return rs1 ^ (1 << (rs2 & (XLEN - 1))); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_sbsetw(const uint32_t rs1, + const uint32_t rs2) { + return rs1 | (1 << (rs2 & (XLEN - 1))); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_slliuw(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return (rs1 & 0xFFFFFFFF) << (rs2 & (XLEN - 1)); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_slow(const uint32_t rs1, + const uint32_t rs2) { + return ~(~rs1 << (rs2 & (XLEN - 1))); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_srow(const uint32_t rs1, + const uint32_t rs2) { + return ~(~rs1 >> (rs2 & (XLEN - 1))); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_subuw(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return rs1 - (rs2 & 0xFFFFFFFF); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_subwu(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return (rs1 - rs2) & 0xFFFFFFFF; +} + +static __inline__ uint64_t __DEFAULT_FN_ATTRS _rv_unshfl(const uint64_t rs1, + const uint64_t rs2) { + uint64_t x = rs1; + const uint_xlen_t shamt = rs2 & 31; + if (shamt & 1) { + x = shuffle_stage(x, 0x4444444444444444LL, 0x2222222222222222LL, 1); + } + if (shamt & 2) { + x = shuffle_stage(x, 0x3030303030303030LL, 0x0c0c0c0c0c0c0c0cLL, 2); + } + if (shamt & 4) { + x = shuffle_stage(x, 0x0f000f000f000f00LL, 0x00f000f000f000f0LL, 4); + } + if (shamt & 8) { + x = shuffle_stage(x, 0x00ff000000ff0000LL, 0x0000ff000000ff00LL, 8); + } + if (shamt & 16) { + x = shuffle_stage(x, 0x0000ffff00000000LL, 0x00000000ffff0000LL, 16); + } + return x; +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv_unshflw(const uint32_t rs1, + const uint32_t rs2) { + uint32_t x = rs1; + const uint_xlen_t shamt = rs2 & 15; + if (shamt & 1) { + x = shuffle32_stage(x, 0x44444444, 0x22222222, 1); + } + if (shamt & 2) { + x = shuffle32_stage(x, 0x30303030, 0x0c0c0c0c, 2); + } + if (shamt & 4) { + x = shuffle32_stage(x, 0x0f000f00, 0x00f000f0, 4); + } + if (shamt & 8) { + x = shuffle32_stage(x, 0x00ff0000, 0x0000ff00, 8); + } + return x; +} + +#endif //__RV64INTRIN_EMULATION_H Index: clang/lib/Headers/rv64bintrin.h =================================================================== --- /dev/null +++ clang/lib/Headers/rv64bintrin.h @@ -0,0 +1,1132 @@ +/* ===-------- rv64bintrin.h -----------------------------------------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __RV64BINTRIN_H +#define __RV64BINTRIN_H + +#ifndef __RVINTRIN_H +#error "Never use directly; include instead." +#endif + +#include + +#include "rv64bintrin-builtins.h" + +#if defined(__riscv_bitmanip) + +#include "rv64bintrin-asm.h" + +#else // Bitmanip extension is disabled. + +#include "rv64bintrin-emulation.h" + +#endif // defined(__riscv_bitmanip) + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_andn(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_andn(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_fsl(const uint_xlen_t rs1, const uint_xlen_t rs2, const uint_xlen_t rs3) { + return _rv_fsl(rs1, rs2, rs3); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_fsr(const uint_xlen_t rs1, const uint_xlen_t rs2, const uint_xlen_t rs3) { + return _rv_fsr(rs1, rs2, rs3); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_max(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_max(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_maxu(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_maxu(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_min(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_min(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_minu(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_minu(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_orn(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_orn(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_rol(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_rol(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_ror(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_ror(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_bdep(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_bdep(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_bext(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_bext(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_bfp(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_bfp(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_clmul(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_clmul(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_clmulh(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_clmulh(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_clmulr(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_clmulr(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS _rv64_cmix( + const uint_xlen_t rs1, const uint_xlen_t rs2, const uint_xlen_t rs3) { + return _rv_cmix(rs1, rs2, rs3); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS _rv64_cmov( + const uint_xlen_t rs1, const uint_xlen_t rs2, const uint_xlen_t rs3) { + return _rv_cmov(rs1, rs2, rs3); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_crc32b(const uint_xlen_t rs1) { + return _rv_crc32b(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_crc32h(const uint_xlen_t rs1) { + return _rv_crc32h(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_crc32w(const uint_xlen_t rs1) { + return _rv_crc32w(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_crc32cb(const uint_xlen_t rs1) { + return _rv_crc32cb(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_crc32ch(const uint_xlen_t rs1) { + return _rv_crc32cb(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_crc32cw(const uint_xlen_t rs1) { + return _rv_crc32cb(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_gorc(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_gorc(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_grev(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_grev(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_pack(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_pack(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_packu(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_packu(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_packh(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_packh(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_sextb(const uint_xlen_t rs1) { + return _rv_sextb(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_sexth(const uint_xlen_t rs1) { + return _rv_sexth(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_sbclr(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_sbclr(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_sbext(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_sbext(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_sbinv(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_sbinv(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_sbset(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_sbset(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_shfl(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_shfl(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_slo(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_slo(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_sro(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_sro(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_unshfl(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_unshfl(rs1, rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_xnor(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return _rv_xnor(rs1, rs2); +} + +// 64 bit only + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv64_fslw(const uint32_t rs1, + const uint32_t rs2, + const uint32_t rs3) { + return _rv_fslw(rs1, rs2, rs3); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv64_fsrw(const uint32_t rs1, + const uint32_t rs2, + const uint32_t rs3) { + return _rv_fsrw(rs1, rs2, rs3); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv64_rolw(const uint32_t rs1, + const uint32_t rs2) { + return _rv_rolw(rs1, rs2); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv64_rorw(const uint32_t rs1, + const uint32_t rs2) { + return _rv_rorw(rs1, rs2); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv64_adduw(const uint32_t rs1, + const uint32_t rs2) { + return _rv_adduw(rs1, rs2); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv64_addwu(const uint32_t rs1, + const uint32_t rs2) { + return _rv_addwu(rs1, rs2); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv64_bdepw(const uint32_t rs1, + const uint32_t rs2) { + return _rv_bdepw(rs1, rs2); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv64_bfpw(const uint32_t rs1, + const uint32_t rs2) { + return _rv_bfpw(rs1, rs2); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv64_bextw(const uint32_t rs1, + const uint32_t rs2) { + return _rv_bextw(rs1, rs2); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS +_rv64_bmatflip(const uint32_t rs1) { + return _rv_bmatflip(rs1); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv64_bmator(const uint32_t rs1, + const uint32_t rs2) { + return _rv_bmator(rs1, rs2); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS +_rv64_bmatxor(const uint32_t rs1, const uint32_t rs2) { + return _rv_bmatxor(rs1, rs2); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS +_rv64_clmulhw(const uint32_t rs1, const uint32_t rs2) { + return _rv_clmulhw(rs1, rs2); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS +_rv64_clmulrw(const uint32_t rs1, const uint32_t rs2) { + return _rv_clmulrw(rs1, rs2); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv64_clmulw(const uint32_t rs1, + const uint32_t rs2) { + return _rv_clmulw(rs1, rs2); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv64_crc32d(const uint32_t rs1) { + return _rv_crc32d(rs1); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS +_rv64_crc32cd(const uint32_t rs1) { + return _rv_crc32cd(rs1); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv64_gorcw(const uint32_t rs1, + const uint32_t rs2) { + return _rv_gorcw(rs1, rs2); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv64_grevw(const uint32_t rs1, + const uint32_t rs2) { + return _rv_grevw(rs1, rs2); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv64_packw(const uint32_t rs1, + const uint32_t rs2) { + return _rv_packw(rs1, rs2); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv64_packuw(const uint32_t rs1, + const uint32_t rs2) { + return _rv_packuw(rs1, rs2); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv64_sbclrw(const uint32_t rs1, + const uint32_t rs2) { + return _rv_sbclrw(rs1, rs2); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv64_sbextw(const uint32_t rs1, + const uint32_t rs2) { + return _rv_sbextw(rs1, rs2); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv64_sbinvw(const uint32_t rs1, + const uint32_t rs2) { + return _rv_sbinvw(rs1, rs2); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv64_sbsetw(const uint32_t rs1, + const uint32_t rs2) { + return _rv_sbsetw(rs1, rs2); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv64_shflw(const uint32_t rs1, + const uint32_t rs2) { + return _rv_shflw(rs1, rs2); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv64_slliuw(const uint32_t rs1, + const uint32_t rs2) { + return _rv_slliuw(rs1, rs2); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv64_slow(const uint32_t rs1, + const uint32_t rs2) { + return _rv_slow(rs1, rs2); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv64_srow(const uint32_t rs1, + const uint32_t rs2) { + return _rv_srow(rs1, rs2); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv64_subuw(const uint32_t rs1, + const uint32_t rs2) { + return _rv_subuw(rs1, rs2); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS _rv64_subwu(const uint32_t rs1, + const uint32_t rs2) { + return _rv_subwu(rs1, rs2); +} + +static __inline__ uint32_t __DEFAULT_FN_ATTRS +_rv64_unshflw(const uint32_t rs1, const uint32_t rs2) { + return _rv_unshflw(rs1, rs2); +} + +// Pseudo Instructions + +// REV (GREVI) pseudo instructions + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_revp(const uint_xlen_t rs1) { + return _rv64_grev(rs1, 0b000001); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_rev2n(const uint_xlen_t rs1) { + return _rv64_grev(rs1, 0b000010); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_revn(const uint_xlen_t rs1) { + return _rv64_grev(rs1, 0b000011); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_rev4b(const uint_xlen_t rs1) { + return _rv64_grev(rs1, 0b000100); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_rev2b(const uint_xlen_t rs1) { + return _rv64_grev(rs1, 0b000110); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_revb(const uint_xlen_t rs1) { + return _rv64_grev(rs1, 0b000111); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_rev8h(const uint_xlen_t rs1) { + return _rv64_grev(rs1, 0b001000); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_rev4h(const uint_xlen_t rs1) { + return _rv64_grev(rs1, 0b001100); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_rev2h(const uint_xlen_t rs1) { + return _rv64_grev(rs1, 0b001110); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_revh(const uint_xlen_t rs1) { + return _rv64_grev(rs1, 0b001111); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_rev16w(const uint_xlen_t rs1) { + return _rv64_grev(rs1, 0b010000); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_rev8w(const uint_xlen_t rs1) { + return _rv64_grev(rs1, 0b011000); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_rev4w(const uint_xlen_t rs1) { + return _rv64_grev(rs1, 0b011100); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_rev2w(const uint_xlen_t rs1) { + return _rv64_grev(rs1, 0b011110); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_revw(const uint_xlen_t rs1) { + return _rv64_grev(rs1, 0b011111); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_rev32(const uint_xlen_t rs1) { + return _rv64_grev(rs1, 0b100000); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_rev16(const uint_xlen_t rs1) { + return _rv64_grev(rs1, 0b110000); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_rev8(const uint_xlen_t rs1) { + return _rv64_grev(rs1, 0b111000); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_rev4(const uint_xlen_t rs1) { + return _rv64_grev(rs1, 0b111100); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_rev2(const uint_xlen_t rs1) { + return _rv64_grev(rs1, 0b111110); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_rev(const uint_xlen_t rs1) { + return _rv64_grev(rs1, 0b111111); +} + +// ORC (GORCI) pseudo instructions + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_orcp(const uint_xlen_t rs1) { + return _rv64_gorc(rs1, 0b000001); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_orc2n(const uint_xlen_t rs1) { + return _rv64_gorc(rs1, 0b000010); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_orcn(const uint_xlen_t rs1) { + return _rv64_gorc(rs1, 0b000011); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_orc4b(const uint_xlen_t rs1) { + return _rv64_gorc(rs1, 0b000100); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_orc2b(const uint_xlen_t rs1) { + return _rv64_gorc(rs1, 0b000110); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_orcb(const uint_xlen_t rs1) { + return _rv64_gorc(rs1, 0b000111); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_orc8h(const uint_xlen_t rs1) { + return _rv64_gorc(rs1, 0b001000); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_orc4h(const uint_xlen_t rs1) { + return _rv64_gorc(rs1, 0b001100); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_orc2h(const uint_xlen_t rs1) { + return _rv64_gorc(rs1, 0b001110); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_orch(const uint_xlen_t rs1) { + return _rv64_gorc(rs1, 0b001111); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_orc16w(const uint_xlen_t rs1) { + return _rv64_gorc(rs1, 0b010000); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_orc8w(const uint_xlen_t rs1) { + return _rv64_gorc(rs1, 0b011000); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_orc4w(const uint_xlen_t rs1) { + return _rv64_gorc(rs1, 0b011100); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_orc2w(const uint_xlen_t rs1) { + return _rv64_gorc(rs1, 0b011110); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_orcw(const uint_xlen_t rs1) { + return _rv64_gorc(rs1, 0b011111); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_orc32(const uint_xlen_t rs1) { + return _rv64_gorc(rs1, 0b100000); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_orc16(const uint_xlen_t rs1) { + return _rv64_gorc(rs1, 0b110000); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_orc8(const uint_xlen_t rs1) { + return _rv64_gorc(rs1, 0b111000); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_orc4(const uint_xlen_t rs1) { + return _rv64_gorc(rs1, 0b111100); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_orc2(const uint_xlen_t rs1) { + return _rv64_gorc(rs1, 0b111110); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_orc(const uint_xlen_t rs1) { + return _rv64_gorc(rs1, 0b111111); +} + +// ZIP (SHFLI) pseudo instructions +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_zipn(const uint_xlen_t rs1) { + return _rv64_shfl(rs1, 0b00001); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_zip2b(const uint_xlen_t rs1) { + return _rv64_shfl(rs1, 0b00010); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_zipb(const uint_xlen_t rs1) { + return _rv64_shfl(rs1, 0b00011); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_zip4h(const uint_xlen_t rs1) { + return _rv64_shfl(rs1, 0b00100); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_zip2h(const uint_xlen_t rs1) { + return _rv64_shfl(rs1, 0b00110); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_ziph(const uint_xlen_t rs1) { + return _rv64_shfl(rs1, 0b00111); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_zip8w(const uint_xlen_t rs1) { + return _rv64_shfl(rs1, 0b01000); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_zip4w(const uint_xlen_t rs1) { + return _rv64_shfl(rs1, 0b01100); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_zip2w(const uint_xlen_t rs1) { + return _rv64_shfl(rs1, 0b01110); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_zipw(const uint_xlen_t rs1) { + return _rv64_shfl(rs1, 0b01111); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_zip16(const uint_xlen_t rs1) { + return _rv64_shfl(rs1, 0b10000); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_zip8(const uint_xlen_t rs1) { + return _rv64_shfl(rs1, 0b11000); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_zip4(const uint_xlen_t rs1) { + return _rv64_shfl(rs1, 0b11100); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_zip2(const uint_xlen_t rs1) { + return _rv64_shfl(rs1, 0b11110); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_zip(const uint_xlen_t rs1) { + return _rv64_shfl(rs1, 0b11111); +} + +// UNZIP (UNSHFL) pseudo instructions + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_unzipn(const uint_xlen_t rs1) { + return _rv64_unshfl(rs1, 0b00001); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_unzip2b(const uint_xlen_t rs1) { + return _rv64_unshfl(rs1, 0b00010); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_unzipb(const uint_xlen_t rs1) { + return _rv64_unshfl(rs1, 0b00011); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_unzip4h(const uint_xlen_t rs1) { + return _rv64_unshfl(rs1, 0b00100); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_unzip2h(const uint_xlen_t rs1) { + return _rv64_unshfl(rs1, 0b00110); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_unziph(const uint_xlen_t rs1) { + return _rv64_unshfl(rs1, 0b00111); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_unzip8w(const uint_xlen_t rs1) { + return _rv64_unshfl(rs1, 0b01000); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_unzip4w(const uint_xlen_t rs1) { + return _rv64_unshfl(rs1, 0b01100); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_unzip2w(const uint_xlen_t rs1) { + return _rv64_unshfl(rs1, 0b01110); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_unzipw(const uint_xlen_t rs1) { + return _rv64_unshfl(rs1, 0b01111); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_unzip16(const uint_xlen_t rs1) { + return _rv64_unshfl(rs1, 0b11000); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_unzip8(const uint_xlen_t rs1) { + return _rv64_unshfl(rs1, 0b11000); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_unzip4(const uint_xlen_t rs1) { + return _rv64_unshfl(rs1, 0b11100); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_unzip2(const uint_xlen_t rs1) { + return _rv64_unshfl(rs1, 0b11110); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv64_unzip(const uint_xlen_t rs1) { + return _rv64_unshfl(rs1, 0b11111); +} + +// Genric aliases for 64 bit pseudo instructions +// e.g. _rv_* is an alias of _rv64_* + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_revp(const uint_xlen_t rs1) { + return _rv64_revp(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_rev2n(const uint_xlen_t rs1) { + return _rv64_rev2n(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_revn(const uint_xlen_t rs1) { + return _rv64_revn(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_rev4b(const uint_xlen_t rs1) { + return _rv64_rev4b(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_rev2b(const uint_xlen_t rs1) { + return _rv64_rev2b(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_revb(const uint_xlen_t rs1) { + return _rv64_revb(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_rev8h(const uint_xlen_t rs1) { + return _rv64_rev8h(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_rev4h(const uint_xlen_t rs1) { + return _rv64_rev4h(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_rev2h(const uint_xlen_t rs1) { + return _rv64_rev2h(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_revh(const uint_xlen_t rs1) { + return _rv64_revh(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_rev16w(const uint_xlen_t rs1) { + return _rv64_rev16w(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_rev8w(const uint_xlen_t rs1) { + return _rv64_rev8w(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_rev4w(const uint_xlen_t rs1) { + return _rv64_rev4w(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_rev2w(const uint_xlen_t rs1) { + return _rv64_rev2w(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_revw(const uint_xlen_t rs1) { + return _rv64_revw(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_rev32(const uint_xlen_t rs1) { + return _rv64_rev32(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_rev16(const uint_xlen_t rs1) { + return _rv64_rev16(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_rev8(const uint_xlen_t rs1) { + return _rv64_rev8(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_rev4(const uint_xlen_t rs1) { + return _rv64_rev4(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_rev2(const uint_xlen_t rs1) { + return _rv64_rev2(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_rev(const uint_xlen_t rs1) { + return _rv64_rev(rs1); +} + +// ORC (GORCI) pseudo instructions + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_orcp(const uint_xlen_t rs1) { + return _rv64_orcp(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_orc2n(const uint_xlen_t rs1) { + return _rv64_orc2n(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_orcn(const uint_xlen_t rs1) { + return _rv64_orcn(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_orc4b(const uint_xlen_t rs1) { + return _rv64_orc4b(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_orc2b(const uint_xlen_t rs1) { + return _rv64_orc2b(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_orcb(const uint_xlen_t rs1) { + return _rv64_orcb(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_orc8h(const uint_xlen_t rs1) { + return _rv64_orc8h(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_orc4h(const uint_xlen_t rs1) { + return _rv64_orc4h(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_orc2h(const uint_xlen_t rs1) { + return _rv64_orc2h(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_orch(const uint_xlen_t rs1) { + return _rv64_orch(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_orc16w(const uint_xlen_t rs1) { + return _rv64_orc16w(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_orc8w(const uint_xlen_t rs1) { + return _rv64_orc8w(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_orc4w(const uint_xlen_t rs1) { + return _rv64_orc4w(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_orc2w(const uint_xlen_t rs1) { + return _rv64_orc2w(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_orcw(const uint_xlen_t rs1) { + return _rv64_orcw(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_orc32(const uint_xlen_t rs1) { + return _rv64_orc32(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_orc16(const uint_xlen_t rs1) { + return _rv64_orc16(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_orc8(const uint_xlen_t rs1) { + return _rv64_orc8(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_orc4(const uint_xlen_t rs1) { + return _rv64_orc4(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_orc2(const uint_xlen_t rs1) { + return _rv64_orc2(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_orc(const uint_xlen_t rs1) { + return _rv64_orc(rs1); +} + +// ZIP (SHFLI) pseudo instructions + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_zipn(const uint_xlen_t rs1) { + return _rv64_zipn(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_zip2b(const uint_xlen_t rs1) { + return _rv64_zip2b(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_zipb(const uint_xlen_t rs1) { + return _rv64_zipb(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_zip4h(const uint_xlen_t rs1) { + return _rv64_zip4h(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_zip2h(const uint_xlen_t rs1) { + return _rv64_zip2h(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_ziph(const uint_xlen_t rs1) { + return _rv64_ziph(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_zip8w(const uint_xlen_t rs1) { + return _rv64_zip8w(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_zip4w(const uint_xlen_t rs1) { + return _rv64_zip4w(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_zip2w(const uint_xlen_t rs1) { + return _rv64_zip2w(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_zipw(const uint_xlen_t rs1) { + return _rv64_zipw(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_zip16(const uint_xlen_t rs1) { + return _rv64_zip16(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_zip8(const uint_xlen_t rs1) { + return _rv64_zip8(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_zip4(const uint_xlen_t rs1) { + return _rv64_zip4(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_zip2(const uint_xlen_t rs1) { + return _rv64_zip2(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_zip(const uint_xlen_t rs1) { + return _rv64_zip(rs1); +} + +// UNZIP (UNSHFL) pseudo instructions + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_unzipn(const uint_xlen_t rs1) { + return _rv64_unzipn(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_unzip2b(const uint_xlen_t rs1) { + return _rv64_unzip2b(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_unzipb(const uint_xlen_t rs1) { + return _rv64_unzipb(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_unzip4h(const uint_xlen_t rs1) { + return _rv64_unzip4h(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_unzip2h(const uint_xlen_t rs1) { + return _rv64_unzip2h(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_unziph(const uint_xlen_t rs1) { + return _rv64_unziph(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_unzip8w(const uint_xlen_t rs1) { + return _rv64_unzip8w(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_unzip4w(const uint_xlen_t rs1) { + return _rv64_unzip4w(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_unzip2w(const uint_xlen_t rs1) { + return _rv64_unzip2w(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_unzipw(const uint_xlen_t rs1) { + return _rv64_unzipw(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_unzip16(const uint_xlen_t rs1) { + return _rv64_unzip16(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_unzip8(const uint_xlen_t rs1) { + return _rv64_unzip8(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_unzip4(const uint_xlen_t rs1) { + return _rv64_unzip4(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_unzip2(const uint_xlen_t rs1) { + return _rv64_unzip2(rs1); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_unzip(const uint_xlen_t rs1) { + return _rv64_unzip(rs1); +} + +#endif // __RV64BINTRIN_H Index: clang/lib/Headers/rvbintrin-asm.h =================================================================== --- /dev/null +++ clang/lib/Headers/rvbintrin-asm.h @@ -0,0 +1,343 @@ +/* ===-------- rvbintrin-asm.h ---------------------------------------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __RVBINTRIN_ASM_H +#define __RVBINTRIN_ASM_H + +#ifndef __RVINTRIN_H +#error "Never use directly; include instead." +#endif + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_andn(const uint_xlen_t rs1, const uint_xlen_t rs2) { + uint_xlen_t rd; + __asm__("andn %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + return rd; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_fsl(const uint_xlen_t rs1, const uint_xlen_t rs2, const uint_xlen_t rs3) { + uint_xlen_t rd; + __asm__("fsl %0, %1, %2, %3" : "=r"(rd) : "r"(rs1), "r"(rs2), "r"(rs3)); + return rd; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_fsr(const uint_xlen_t rs1, const uint_xlen_t rs2, const uint_xlen_t rs3) { + uint_xlen_t rd; + if (__builtin_constant_p(rs2)) { + __asm__("fsri %0, %1, %2, %3" : "=r"(rd) : "r"(rs1), "r"(rs2), "i"(rs3)); + } else { + __asm__("fsr %0, %1, %2, %3" : "=r"(rd) : "r"(rs1), "r"(rs2), "r"(rs3)); + } + return rd; +} + +static __inline__ int_xlen_t _rv_max(const int_xlen_t rs1, + const int_xlen_t rs2) { + int_xlen_t rd; + __asm__("max %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + return rd; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_maxu(const uint_xlen_t rs1, const uint_xlen_t rs2) { + uint_xlen_t rd; + __asm__("maxu %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + return rd; +} + +static __inline__ int_xlen_t _rv_min(const int_xlen_t rs1, + const int_xlen_t rs2) { + int_xlen_t rd; + __asm__("min %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + return rd; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_minu(const uint_xlen_t rs1, const uint_xlen_t rs2) { + uint_xlen_t rd; + __asm__("minu %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + return rd; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_orn(const uint_xlen_t rs1, const uint_xlen_t rs2) { + uint_xlen_t rd; + __asm__("orn %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + return rd; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_rol(const uint_xlen_t rs1, const uint_xlen_t rs2) { + uint_xlen_t rd; + __asm__("rol %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + return rd; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_ror(const uint_xlen_t rs1, const uint_xlen_t rs2) { + uint_xlen_t rd; + if (__builtin_constant_p(rs2)) { + __asm__("rori %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(rs2)); + } else { + __asm__("ror %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + } + return rd; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_bdep(const uint_xlen_t rs1, const uint_xlen_t rs2) { + uint_xlen_t rd; + __asm__("bdep %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + return rd; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_bext(const uint_xlen_t rs1, const uint_xlen_t rs2) { + uint_xlen_t rd; + __asm__("bext %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + return rd; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_bfp(const uint_xlen_t rs1, const uint_xlen_t rs2) { + uint_xlen_t rd; + __asm__("bfp %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + return rd; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_clmul(const uint_xlen_t rs1, const uint_xlen_t rs2) { + uint_xlen_t rd; + __asm__("clmul %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + return rd; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_clmulh(const uint_xlen_t rs1, const uint_xlen_t rs2) { + uint_xlen_t rd; + __asm__("clmulh %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + return rd; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_clmulr(const uint_xlen_t rs1, const uint_xlen_t rs2) { + uint_xlen_t rd; + __asm__("clmulr %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + return rd; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_cmix(const uint_xlen_t rs1, const uint_xlen_t rs2, const uint_xlen_t rs3) { + uint_xlen_t rd; + __asm__("cmix %0, %1, %2, %3" : "=r"(rd) : "r"(rs1), "r"(rs2), "r"(rs3)); + return rd; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_cmov(const uint_xlen_t rs1, const uint_xlen_t rs2, const uint_xlen_t rs3) { + uint_xlen_t rd; + __asm__("cmov %0, %1, %2, %3" : "=r"(rd) : "r"(rs1), "r"(rs2), "r"(rs3)); + return rd; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_crc32b(const uint_xlen_t rs1) { + uint_xlen_t rd; + __asm__("crc32b %0, %1" : "=r"(rd) : "r"(rs1)); + return rd; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_crc32h(const uint_xlen_t rs1) { + uint_xlen_t rd; + __asm__("crc32h %0, %1" : "=r"(rd) : "r"(rs1)); + return rd; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_crc32w(const uint_xlen_t rs1) { + uint_xlen_t rd; + __asm__("crc32w %0, %1" : "=r"(rd) : "r"(rs1)); + return rd; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_crc32cb(const uint_xlen_t rs1) { + uint_xlen_t rd; + __asm__("crc32cb %0, %1" : "=r"(rd) : "r"(rs1)); + return rd; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_crc32ch(const uint_xlen_t rs1) { + uint_xlen_t rd; + __asm__("crc32ch %0, %1" : "=r"(rd) : "r"(rs1)); + return rd; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_crc32cw(const uint_xlen_t rs1) { + uint_xlen_t rd; + __asm__("crc32cw %0, %1" : "=r"(rd) : "r"(rs1)); + return rd; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_gorc(const uint_xlen_t rs1, const uint_xlen_t rs2) { + uint_xlen_t rd; + if (__builtin_constant_p(rs2)) { + __asm__("gorci %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(rs2)); + } else { + __asm__("gorc %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + } + return rd; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_grev(const uint_xlen_t rs1, const uint_xlen_t rs2) { + uint_xlen_t rd; + if (__builtin_constant_p(rs2)) { + __asm__("grevi %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(rs2)); + } else { + __asm__("grev %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + } + return rd; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_pack(const uint_xlen_t rs1, const uint_xlen_t rs2) { + uint_xlen_t rd; + __asm__("pack %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + return rd; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_packu(const uint_xlen_t rs1, const uint_xlen_t rs2) { + uint_xlen_t rd; + __asm__("packu %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + return rd; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_packh(const uint_xlen_t rs1, const uint_xlen_t rs2) { + uint_xlen_t rd; + __asm__("packh %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + return rd; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_sextb(const uint_xlen_t rs1) { + uint_xlen_t rd; + __asm__("sext.b %0, %1" : "=r"(rd) : "r"(rs1)); + return rd; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_sexth(const uint_xlen_t rs1) { + uint_xlen_t rd; + __asm__("sext.h %0, %1" : "=r"(rd) : "r"(rs1)); + return rd; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_sbclr(const uint_xlen_t rs1, const uint_xlen_t rs2) { + uint_xlen_t rd; + if (__builtin_constant_p(rs2)) { + __asm__("sbclri %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(rs2)); + } else { + __asm__("sbclr %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + } + return rd; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_sbext(const uint_xlen_t rs1, const uint_xlen_t rs2) { + uint_xlen_t rd; + if (__builtin_constant_p(rs2)) { + __asm__("sbexti %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(rs2)); + } else { + __asm__("sbext %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + } + return rd; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_sbinv(const uint_xlen_t rs1, const uint_xlen_t rs2) { + uint_xlen_t rd; + if (__builtin_constant_p(rs2)) { + __asm__("sbinvi %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(rs2)); + } else { + __asm__("sbinv %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + } + return rd; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_sbset(const uint_xlen_t rs1, const uint_xlen_t rs2) { + uint_xlen_t rd; + if (__builtin_constant_p(rs2)) { + __asm__("sbseti %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(rs2)); + } else { + __asm__("sbset %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + } + return rd; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_shfl(const uint_xlen_t rs1, const uint_xlen_t rs2) { + uint_xlen_t rd; + if (__builtin_constant_p(rs2)) { + __asm__("shfli %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(rs2)); + } else { + __asm__("shfl %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + } + return rd; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_slo(const uint_xlen_t rs1, const uint_xlen_t rs2) { + uint_xlen_t rd; + if (__builtin_constant_p(rs2)) { + __asm__("sloi %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(rs2)); + } else { + __asm__("slo %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + } + return rd; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_sro(const uint_xlen_t rs1, const uint_xlen_t rs2) { + uint_xlen_t rd; + if (__builtin_constant_p(rs2)) { + __asm__("sroi %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(rs2)); + } else { + __asm__("sro %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + } + return rd; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_unshfl(const uint_xlen_t rs1, const uint_xlen_t rs2) { + uint_xlen_t rd; + if (__builtin_constant_p(rs2)) { + __asm__("unshfli %0, %1, %2" : "=r"(rd) : "r"(rs1), "i"(rs2)); + } else { + __asm__("unshfl %0, %1, %2" : "=r"(rd) : "r"(rs1), "r"(rs2)); + } + return rd; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_xnor(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return rs1 ^ ~rs2; +} + +#endif // __RVBINTRIN_ASM_H Index: clang/lib/Headers/rvbintrin-emulation.h =================================================================== --- /dev/null +++ clang/lib/Headers/rvbintrin-emulation.h @@ -0,0 +1,285 @@ +/* ===-------- rvbintrin-emulation.h ---------------------------------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __RVBINTRIN_EMULATION_H +#define __RVBINTRIN_EMULATION_H + +#ifndef __RVINTRIN_H +#error \ + "Never use directly; include instead." +#endif + +#include + +// If the builtins are unavailable then these provide the same functionality. + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_slo(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return ~(~rs1 << (rs2 & (XLEN - 1))); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_andn(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return rs1 & ~rs2; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_fsl(const uint_xlen_t rs1, const uint_xlen_t rs2, const uint_xlen_t rs3) { + uint_xlen_t shamt = rs2 & (2 * XLEN - 1); + uint_xlen_t A = rs1; + uint_xlen_t B = rs3; + if (shamt >= XLEN) { + shamt -= XLEN; + A = rs3; + B = rs1; + } + return shamt ? (A << shamt) | (B >> (XLEN - shamt)) : A; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_fsr(const uint_xlen_t rs1, const uint_xlen_t rs2, const uint_xlen_t rs3) { + uint_xlen_t shamt = rs2 & (2 * XLEN - 1); + uint_xlen_t A = rs1; + uint_xlen_t B = rs3; + if (shamt >= XLEN) { + shamt -= XLEN; + A = rs3; + B = rs1; + } + return shamt ? (A >> shamt) | (B << (XLEN - shamt)) : A; +} + +static __inline__ int_xlen_t _rv_max(const int_xlen_t rs1, + const int_xlen_t rs2) { + return rs1 > rs2 ? rs1 : rs2; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_maxu(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return rs1 > rs2 ? rs1 : rs2; +} + +static __inline__ int_xlen_t _rv_min(const int_xlen_t rs1, + const int_xlen_t rs2) { + return rs1 < rs2 ? rs1 : rs2; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_minu(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return rs1 < rs2 ? rs1 : rs2; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_orn(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return rs1 | ~rs2; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_rol(const uint_xlen_t rs1, const uint_xlen_t rs2) { + const uint_xlen_t shamt = rs2 & (XLEN - 1); + return (rs1 << shamt) | (rs1 >> ((XLEN - shamt) & (XLEN - 1))); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_ror(const uint_xlen_t rs1, const uint_xlen_t rs2) { + const uint_xlen_t shamt = rs2 & (XLEN - 1); + return (rs1 >> shamt) | (rs1 << ((XLEN - shamt) & (XLEN - 1))); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_bdep(const uint_xlen_t rs1, const uint_xlen_t rs2) { + uint_xlen_t r = 0; + for (uint_xlen_t i = 0, j = 0; i < XLEN; i++) { + if ((rs2 >> i) & 1) { + if ((rs1 >> j) & 1) { + r |= 1 << i; + } + j++; + } + } + return r; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_bext(const uint_xlen_t rs1, const uint_xlen_t rs2) { + uint_xlen_t r = 0; + for (uint_xlen_t i = 0, j = 0; i < XLEN; i++) { + if ((rs2 >> i) & 1) { + if ((rs1 >> i) & 1) { + r |= 1 << j; + } + j++; + } + } + return r; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_bfp(const uint_xlen_t rs1, const uint_xlen_t rs2) { + uint_xlen_t len = (rs2 >> 24) & 15; + const uint_xlen_t off = (rs2 >> 16) & (XLEN - 1); + len = len ? len : 16; + const uint_xlen_t mask = _rv_rol(_rv_slo(0, len), off); + const uint_xlen_t data = _rv_rol(rs2, off); + return (data & mask) | (rs1 & ~mask); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_clmul(const uint_xlen_t rs1, const uint_xlen_t rs2) { + uint_xlen_t x = 0; + for (uint_xlen_t i = 0; i < XLEN; i++) { + if ((rs2 >> i) & 1) { + x ^= rs1 << i; + } + } + return x; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_clmulh(const uint_xlen_t rs1, const uint_xlen_t rs2) { + uint_xlen_t x = 0; + for (uint_xlen_t i = 1; i < XLEN; i++) { + if ((rs2 >> i) & 1) { + x ^= rs1 >> (XLEN - i); + } + } + return x; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_clmulr(const uint_xlen_t rs1, const uint_xlen_t rs2) { + uint_xlen_t x = 0; + for (uint_xlen_t i = 0; i < XLEN; i++) { + if ((rs2 >> i) & 1) { + x ^= rs1 >> (XLEN - i - 1); + } + } + return x; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_cmix(const uint_xlen_t rs1, const uint_xlen_t rs2, const uint_xlen_t rs3) { + return (rs1 & rs2) | (rs3 & ~rs2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_cmov(const uint_xlen_t rs1, const uint_xlen_t rs2, const uint_xlen_t rs3) { + return rs2 ? rs1 : rs3; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +crc32(uint_xlen_t x, const uint_xlen_t nbits) { + for (uint_xlen_t i = 0; i < nbits; i++) { + x = (x >> 1) ^ (0xEDB88320 & ~((x & 1) - 1)); + } + return x; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +crc32c(uint_xlen_t x, const uint_xlen_t nbits) { + for (uint_xlen_t i = 0; i < nbits; i++) { + x = (x >> 1) ^ (0x82F63B78 & ~((x & 1) - 1)); + } + return x; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_crc32b(const uint_xlen_t rs1) { + return crc32(rs1, 8); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_crc32h(const uint_xlen_t rs1) { + return crc32(rs1, 16); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_crc32w(const uint_xlen_t rs1) { + return crc32(rs1, 32); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_crc32cb(const uint_xlen_t rs1) { + return crc32c(rs1, 8); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_crc32ch(const uint_xlen_t rs1) { + return crc32c(rs1, 16); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_crc32cw(const uint_xlen_t rs1) { + return crc32c(rs1, 32); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_pack(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return (rs2 << XLEN / 2) | ((rs1 << XLEN / 2) >> XLEN / 2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_packu(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return (rs1 >> XLEN / 2) | ((rs2 >> XLEN / 2) << XLEN / 2); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_packh(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return (rs1 & 255) | ((rs2 & 255) << 8); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_sextb(const uint_xlen_t rs1) { + return int_xlen_t(x << (XLEN - 8)) >> (XLEN - 8); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_sexth(const uint_xlen_t rs1) { + return int_xlen_t(x << (XLEN - 16)) >> (XLEN - 16); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_sbclr(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return rs1 & ~(1 << (rs2 & (XLEN - 1))); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_sbext(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return 1 & (rs1 >> (rs2 & (XLEN - 1))); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_sbinv(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return rs1 ^ (1 << (rs2 & (XLEN - 1))); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_sbset(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return rs1 | (1 << (rs2 & (XLEN - 1))); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +shuffle_stage(const uint_xlen_t src, const uint_xlen_t maskL, + const uint_xlen_t maskR, const uint_xlen_t N) { + uint_xlen_t x = src & ~(maskL | maskR); + x |= ((src << N) & maskL) | ((src >> N) & maskR); + return x; +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_sro(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return ~(~rs1 >> (rs2 & (XLEN - 1))); +} + +static __inline__ uint_xlen_t __DEFAULT_FN_ATTRS +_rv_xnor(const uint_xlen_t rs1, const uint_xlen_t rs2) { + return rs1 ^ ~rs2; +} + +#endif //__RVBINTRIN_EMULATION_H Index: clang/lib/Headers/rvintrin.h =================================================================== --- /dev/null +++ clang/lib/Headers/rvintrin.h @@ -0,0 +1,39 @@ +/* ===-------- rvintrin.h --------------------------------------------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __RVINTRIN_H +#define __RVINTRIN_H + +// Long is 32 bit on riscv32 and 64 bit on riscv64 +#define int_xlen_t long +#define uint_xlen_t unsigned int_xlen_t +#define XLEN __riscv_xlen + +_Static_assert(__riscv_xlen == sizeof(uint_xlen_t) * 8, + "uint_xlen_t is not __riscv_xlen bits long"); + +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __artificial__, __nodebug__)) + +#if defined(__riscv32__) + +#include "rv32bintrin.h" + +#elif defined(__riscv64__) + +#include "rv64bintrin.h" + +#endif // defined(__riscv64__) + +#undef __DEFAULT_FN_ATTRS + +#undef XLEN +#undef uint_xlen_t +#undef int_xlen_t +#endif // __RVINTRIN_H Index: clang/test/Headers/rvintrin.c =================================================================== --- /dev/null +++ clang/test/Headers/rvintrin.c @@ -0,0 +1,37 @@ +// RUN: %clang -fsyntax-only -ffreestanding --target=riscv32 -std=c89 -xc %s +// RUN: %clang -fsyntax-only -Wall -Werror -ffreestanding --target=riscv32 -std=c99 -xc %s +// RUN: %clang -fsyntax-only -Wall -Werror -ffreestanding --target=riscv32 -std=c11 -xc %s + +// RUN: %clang -fsyntax-only -ffreestanding --target=riscv64 -std=c89 -xc %s +// RUN: %clang -fsyntax-only -Wall -Werror -ffreestanding --target=riscv64 -std=c99 -xc %s +// RUN: %clang -fsyntax-only -Wall -Werror -ffreestanding --target=riscv64 -std=c11 -xc %s + +// RUN: %clang -fsyntax-only -Wall -Werror -ffreestanding --target=riscv32 -std=c++98 -xc++ %s +// RUN: %clang -fsyntax-only -Wall -Werror -ffreestanding --target=riscv32 -std=c++11 -xc++ %s +// RUN: %clang -fsyntax-only -Wall -Werror -ffreestanding --target=riscv32 -std=c++14 -xc++ %s +// RUN: %clang -fsyntax-only -Wall -Werror -ffreestanding --target=riscv32 -std=c++17 -xc++ %s + +// RUN: %clang -fsyntax-only -Wall -Werror -ffreestanding --target=riscv64 -std=c++98 -xc++ %s +// RUN: %clang -fsyntax-only -Wall -Werror -ffreestanding --target=riscv64 -std=c++11 -xc++ %s +// RUN: %clang -fsyntax-only -Wall -Werror -ffreestanding --target=riscv64 -std=c++14 -xc++ %s +// RUN: %clang -fsyntax-only -Wall -Werror -ffreestanding --target=riscv64 -std=c++17 -xc++ %s + +// RUN: %clang -fsyntax-only -ffreestanding --target=riscv32 -Xclang -target-feature -Xclang +b -std=c89 -xc %s +// RUN: %clang -fsyntax-only -Wall -Werror -ffreestanding --target=riscv32 -Xclang -target-feature -Xclang +b -std=c99 -xc %s +// RUN: %clang -fsyntax-only -Wall -Werror -ffreestanding --target=riscv32 -Xclang -target-feature -Xclang +b -std=c11 -xc %s + +// RUN: %clang -fsyntax-only -ffreestanding --target=riscv64 -Xclang -target-feature -Xclang +b -std=c89 -xc %s +// RUN: %clang -fsyntax-only -Wall -Werror -ffreestanding --target=riscv64 -Xclang -target-feature -Xclang +b -std=c99 -xc %s +// RUN: %clang -fsyntax-only -Wall -Werror -ffreestanding --target=riscv64 -Xclang -target-feature -Xclang +b -std=c11 -xc %s + +// RUN: %clang -fsyntax-only -Wall -Werror -ffreestanding --target=riscv32 -Xclang -target-feature -Xclang +b -std=c++98 -xc++ %s +// RUN: %clang -fsyntax-only -Wall -Werror -ffreestanding --target=riscv32 -Xclang -target-feature -Xclang +b -std=c++11 -xc++ %s +// RUN: %clang -fsyntax-only -Wall -Werror -ffreestanding --target=riscv32 -Xclang -target-feature -Xclang +b -std=c++14 -xc++ %s +// RUN: %clang -fsyntax-only -Wall -Werror -ffreestanding --target=riscv32 -Xclang -target-feature -Xclang +b -std=c++17 -xc++ %s + +// RUN: %clang -fsyntax-only -Wall -Werror -ffreestanding --target=riscv64 -Xclang -target-feature -Xclang +b -std=c++98 -xc++ %s +// RUN: %clang -fsyntax-only -Wall -Werror -ffreestanding --target=riscv64 -Xclang -target-feature -Xclang +b -std=c++11 -xc++ %s +// RUN: %clang -fsyntax-only -Wall -Werror -ffreestanding --target=riscv64 -Xclang -target-feature -Xclang +b -std=c++14 -xc++ %s +// RUN: %clang -fsyntax-only -Wall -Werror -ffreestanding --target=riscv64 -Xclang -target-feature -Xclang +b -std=c++17 -xc++ %s + +#include Index: llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn =================================================================== --- llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn +++ llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn @@ -132,6 +132,16 @@ "ptwriteintrin.h", "rdseedintrin.h", "rtmintrin.h", + "rv32bintrin-builtins.h", + "rv32bintrin-emulation.h", + "rv32bintrin.h", + "rv64bintrin-asm.h", + "rv64bintrin-builtins.h", + "rv64bintrin-emulation.h", + "rv64bintrin.h", + "rvbintrin-asm.h", + "rvbintrin-emulation.h", + "rvintrin.h", "s390intrin.h", "sgxintrin.h", "shaintrin.h",