diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -109,6 +109,7 @@
   vecintrin.h
   vpclmulqdqintrin.h
   waitpkgintrin.h
+  wasm_simd128.h
   wbnoinvdintrin.h
   wmmintrin.h
   __wmmintrin_aes.h
diff --git a/clang/lib/Headers/wasm_simd128.h b/clang/lib/Headers/wasm_simd128.h
new file mode 100644
--- /dev/null
+++ b/clang/lib/Headers/wasm_simd128.h
@@ -0,0 +1,1240 @@
+/*===---- wasm_simd128.h - WebAssembly portable SIMD intrinsics ------------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#pragma once
+
+#include <stdbool.h>
+#include <stdint.h>
+
+// User-facing type
+typedef int32_t v128_t __attribute__((__vector_size__(16), __aligned__(16)));
+
+// Internal types determined by clang builtin definitions
+typedef int32_t __v128_u __attribute__((__vector_size__(16), __aligned__(1)));
+typedef char __i8x16 __attribute__((__vector_size__(16), __aligned__(16)));
+typedef unsigned char __u8x16
+    __attribute__((__vector_size__(16), __aligned__(16)));
+typedef short __i16x8 __attribute__((__vector_size__(16), __aligned__(16)));
+typedef unsigned short __u16x8
+    __attribute__((__vector_size__(16), __aligned__(16)));
+typedef int __i32x4 __attribute__((__vector_size__(16), __aligned__(16)));
+typedef unsigned int __u32x4
+    __attribute__((__vector_size__(16), __aligned__(16)));
+typedef long long __i64x2 __attribute__((__vector_size__(16), __aligned__(16)));
+typedef unsigned long long __u64x2
+    __attribute__((__vector_size__(16), __aligned__(16)));
+typedef float __f32x4 __attribute__((__vector_size__(16), __aligned__(16)));
+typedef double __f64x2 __attribute__((__vector_size__(16), __aligned__(16)));
+
+#define __DEFAULT_FN_ATTRS                                                     \
+  __attribute__((__always_inline__, __nodebug__, __target__("simd128"),        \
+                 __min_vector_width__(128)))
+
+#define __REQUIRE_CONSTANT(e)                                                  \
+  _Static_assert(__builtin_constant_p(e), "Expected constant")
+
+// v128 wasm_v128_load(void* mem)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_v128_load(const void *__mem) {
+  // UB-free unaligned access copied from xmmintrin.h
+  struct __wasm_v128_load_struct {
+    __v128_u __v;
+  } __attribute__((__packed__, __may_alias__));
+  return ((const struct __wasm_v128_load_struct *)__mem)->__v;
+}
+
+#ifdef __wasm_unimplemented_simd128__
+
+// v128_t wasm_v8x16_load_splat(void* mem)
+static __inline__ v128_t __DEFAULT_FN_ATTRS
+wasm_v8x16_load_splat(const void *__mem) {
+  struct __wasm_v8x16_load_splat_struct {
+    char __v;
+  } __attribute__((__packed__, __may_alias__));
+  char v = ((const struct __wasm_v8x16_load_splat_struct *)__mem)->__v;
+  return (v128_t)(__i8x16){v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v};
+}
+
+// v128_t wasm_v16x8_load_splat(void* mem)
+static __inline__ v128_t __DEFAULT_FN_ATTRS
+wasm_v16x8_load_splat(const void *__mem) {
+  struct __wasm_v16x8_load_splat_struct {
+    short __v;
+  } __attribute__((__packed__, __may_alias__));
+  short v = ((const struct __wasm_v16x8_load_splat_struct *)__mem)->__v;
+  return (v128_t)(__i16x8){v, v, v, v, v, v, v, v};
+}
+
+// v128_t wasm_v32x4_load_splat(void* mem)
+static __inline__ v128_t __DEFAULT_FN_ATTRS
+wasm_v32x4_load_splat(const void *__mem) {
+  struct __wasm_v32x4_load_splat_struct {
+    int __v;
+  } __attribute__((__packed__, __may_alias__));
+  int v = ((const struct __wasm_v32x4_load_splat_struct *)__mem)->__v;
+  return (v128_t)(__i32x4){v, v, v, v};
+}
+
+// v128_t wasm_v64x2_load_splat(void* mem)
+static __inline__ v128_t __DEFAULT_FN_ATTRS
+wasm_v64x2_load_splat(const void *__mem) {
+  struct __wasm_v64x2_load_splat_struct {
+    long long __v;
+  } __attribute__((__packed__, __may_alias__));
+  long long v = ((const struct __wasm_v64x2_load_splat_struct *)__mem)->__v;
+  return (v128_t)(__i64x2){v, v};
+}
+
+// v128_t wasm_i16x8_load_8x8(void* mem)
+static __inline__ v128_t __DEFAULT_FN_ATTRS
+wasm_i16x8_load_8x8(const void *__mem) {
+  typedef signed char __i8x8
+      __attribute__((__vector_size__(8), __aligned__(8)));
+  struct __wasm_i16x8_load_8x8_struct {
+    __i8x8 __v;
+  } __attribute__((__packed__, __may_alias__));
+  __i8x8 v = ((const struct __wasm_i16x8_load_8x8_struct *)__mem)->__v;
+  return (v128_t) __builtin_convertvector(v, __i16x8);
+}
+
+// v128_t wasm_i16x8_load_8x8(void* mem)
+static __inline__ v128_t __DEFAULT_FN_ATTRS
+wasm_u16x8_load_8x8(const void *__mem) {
+  typedef unsigned char __u8x8
+      __attribute__((__vector_size__(8), __aligned__(8)));
+  struct __wasm_u16x8_load_8x8_struct {
+    __u8x8 __v;
+  } __attribute__((__packed__, __may_alias__));
+  __u8x8 v = ((const struct __wasm_u16x8_load_8x8_struct *)__mem)->__v;
+  return (v128_t) __builtin_convertvector(v, __u16x8);
+}
+
+// v128_t wasm_i32x4_load_16x4(void* mem)
+static __inline__ v128_t __DEFAULT_FN_ATTRS
+wasm_i32x4_load_16x4(const void *__mem) {
+  typedef short __i16x4 __attribute__((__vector_size__(8), __aligned__(8)));
+  struct __wasm_i32x4_load_16x4_struct {
+    __i16x4 __v;
+  } __attribute__((__packed__, __may_alias__));
+  __i16x4 v = ((const struct __wasm_i32x4_load_16x4_struct *)__mem)->__v;
+  return (v128_t) __builtin_convertvector(v, __i32x4);
+}
+
+// v128_t wasm_i32x4_load_16x4(void* mem)
+static __inline__ v128_t __DEFAULT_FN_ATTRS
+wasm_u32x4_load_16x4(const void *__mem) {
+  typedef unsigned short __u16x4
+      __attribute__((__vector_size__(8), __aligned__(8)));
+  struct __wasm_u32x4_load_16x4_struct {
+    __u16x4 __v;
+  } __attribute__((__packed__, __may_alias__));
+  __u16x4 v = ((const struct __wasm_u32x4_load_16x4_struct *)__mem)->__v;
+  return (v128_t) __builtin_convertvector(v, __u32x4);
+}
+
+// v128_t wasm_i64x2_load_16x4(void* mem)
+static __inline__ v128_t __DEFAULT_FN_ATTRS
+wasm_i64x2_load_32x2(const void *__mem) {
+  typedef int __i32x2 __attribute__((__vector_size__(8), __aligned__(8)));
+  struct __wasm_i64x2_load_32x2_struct {
+    __i32x2 __v;
+  } __attribute__((__packed__, __may_alias__));
+  __i32x2 v = ((const struct __wasm_i64x2_load_32x2_struct *)__mem)->__v;
+  return (v128_t) __builtin_convertvector(v, __i64x2);
+}
+
+// v128_t wasm_i64x2_load_16x4(void* mem)
+static __inline__ v128_t __DEFAULT_FN_ATTRS
+wasm_u64x2_load_32x2(const void *__mem) {
+  typedef unsigned int __u32x2
+      __attribute__((__vector_size__(8), __aligned__(8)));
+  struct __wasm_u64x2_load_32x2_struct {
+    __u32x2 __v;
+  } __attribute__((__packed__, __may_alias__));
+  __u32x2 v = ((const struct __wasm_u64x2_load_32x2_struct *)__mem)->__v;
+  return (v128_t) __builtin_convertvector(v, __u64x2);
+}
+
+#endif // __wasm_unimplemented_simd128__
+
+// wasm_v128_store(void* mem, v128 a)
+static __inline__ void __DEFAULT_FN_ATTRS wasm_v128_store(void *__mem,
+                                                          v128_t __a) {
+  // UB-free unaligned access copied from xmmintrin.h
+  struct __wasm_v128_store_struct {
+    __v128_u __v;
+  } __attribute__((__packed__, __may_alias__));
+  ((struct __wasm_v128_store_struct *)__mem)->__v = __a;
+}
+
+// wasm_i8x16_make(...)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_make(
+    int8_t c0, int8_t c1, int8_t c2, int8_t c3, int8_t c4, int8_t c5, int8_t c6,
+    int8_t c7, int8_t c8, int8_t c9, int8_t c10, int8_t c11, int8_t c12,
+    int8_t c13, int8_t c14, int8_t c15) {
+  return (v128_t)(__i8x16){c0, c1, c2,  c3,  c4,  c5,  c6,  c7,
+                           c8, c9, c10, c11, c12, c13, c14, c15};
+}
+
+// wasm_i16x8_make(...)
+static __inline__ v128_t __DEFAULT_FN_ATTRS
+wasm_i16x8_make(int16_t c0, int16_t c1, int16_t c2, int16_t c3, int16_t c4,
+                int16_t c5, int16_t c6, int16_t c7) {
+  return (v128_t)(__i16x8){c0, c1, c2, c3, c4, c5, c6, c7};
+}
+
+// wasm_i32x4_make(...)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_make(int32_t c0,
+                                                            int32_t c1,
+                                                            int32_t c2,
+                                                            int32_t c3) {
+  return (v128_t)(__i32x4){c0, c1, c2, c3};
+}
+
+// wasm_f32x4_make(...)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_make(float c0, float c1,
+                                                            float c2,
+                                                            float c3) {
+  return (v128_t)(__f32x4){c0, c1, c2, c3};
+}
+
+// wasm_i64x2_make(...)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_make(int64_t c0,
+                                                            int64_t c1) {
+  return (v128_t)(__i64x2){c0, c1};
+}
+
+// wasm_f64x2_make(...)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_make(double c0,
+                                                            double c1) {
+  return (v128_t)(__f64x2){c0, c1};
+}
+
+// v128_t wasm_i8x16_constant(...)
+#define wasm_i8x16_const(c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11,     \
+                         c12, c13, c14, c15)                                   \
+  __extension__({                                                              \
+    __REQUIRE_CONSTANT(c0);                                                    \
+    __REQUIRE_CONSTANT(c1);                                                    \
+    __REQUIRE_CONSTANT(c2);                                                    \
+    __REQUIRE_CONSTANT(c3);                                                    \
+    __REQUIRE_CONSTANT(c4);                                                    \
+    __REQUIRE_CONSTANT(c5);                                                    \
+    __REQUIRE_CONSTANT(c6);                                                    \
+    __REQUIRE_CONSTANT(c7);                                                    \
+    __REQUIRE_CONSTANT(c8);                                                    \
+    __REQUIRE_CONSTANT(c9);                                                    \
+    __REQUIRE_CONSTANT(c10);                                                   \
+    __REQUIRE_CONSTANT(c11);                                                   \
+    __REQUIRE_CONSTANT(c12);                                                   \
+    __REQUIRE_CONSTANT(c13);                                                   \
+    __REQUIRE_CONSTANT(c14);                                                   \
+    __REQUIRE_CONSTANT(c15);                                                   \
+    (v128_t)(__i8x16){c0, c1, c2,  c3,  c4,  c5,  c6,  c7,                     \
+                      c8, c9, c10, c11, c12, c13, c14, c15};                   \
+  })
+
+// v128_t wasm_i16x8_constant(...)
+#define wasm_i16x8_const(c0, c1, c2, c3, c4, c5, c6, c7)                       \
+  __extension__({                                                              \
+    __REQUIRE_CONSTANT(c0);                                                    \
+    __REQUIRE_CONSTANT(c1);                                                    \
+    __REQUIRE_CONSTANT(c2);                                                    \
+    __REQUIRE_CONSTANT(c3);                                                    \
+    __REQUIRE_CONSTANT(c4);                                                    \
+    __REQUIRE_CONSTANT(c5);                                                    \
+    __REQUIRE_CONSTANT(c6);                                                    \
+    __REQUIRE_CONSTANT(c7);                                                    \
+    (v128_t)(__i16x8){c0, c1, c2, c3, c4, c5, c6, c7};                         \
+  })
+
+// v128_t wasm_i32x4_const(...)
+#define wasm_i32x4_const(c0, c1, c2, c3)                                       \
+  __extension__({                                                              \
+    __REQUIRE_CONSTANT(c0);                                                    \
+    __REQUIRE_CONSTANT(c1);                                                    \
+    __REQUIRE_CONSTANT(c2);                                                    \
+    __REQUIRE_CONSTANT(c3);                                                    \
+    (v128_t)(__i32x4){c0, c1, c2, c3};                                         \
+  })
+
+// v128_t wasm_f32x4_constant(...)
+#define wasm_f32x4_const(c0, c1, c2, c3)                                       \
+  __extension__({                                                              \
+    __REQUIRE_CONSTANT(c0);                                                    \
+    __REQUIRE_CONSTANT(c1);                                                    \
+    __REQUIRE_CONSTANT(c2);                                                    \
+    __REQUIRE_CONSTANT(c3);                                                    \
+    (v128_t)(__f32x4){c0, c1, c2, c3};                                         \
+  })
+
+// v128_t wasm_i64x2_constant(...)
+#define wasm_i64x2_const(c0, c1)                                               \
+  __extension__({                                                              \
+    __REQUIRE_CONSTANT(c0);                                                    \
+    __REQUIRE_CONSTANT(c1);                                                    \
+    (v128_t)(__i64x2){c0, c1};                                                 \
+  })
+
+// v128_t wasm_f64x2_constant(...)
+#define wasm_f64x2_const(c0, c1)                                               \
+  __extension__({                                                              \
+    __REQUIRE_CONSTANT(c0);                                                    \
+    __REQUIRE_CONSTANT(c1);                                                    \
+    (v128_t)(__f64x2){c0, c1};                                                 \
+  })
+
+// v128_t wasm_i8x16_splat(int8_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_splat(int8_t a) {
+  return (v128_t)(__i8x16){a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a};
+}
+
+// int8_t wasm_i8x16_extract_lane(v128_t a, imm i)
+#define wasm_i8x16_extract_lane(a, i)                                          \
+  (__builtin_wasm_extract_lane_s_i8x16((__i8x16)(a), i))
+
+// int8_t wasm_u8x16_extract_lane(v128_t a, imm i)
+#define wasm_u8x16_extract_lane(a, i)                                          \
+  (__builtin_wasm_extract_lane_u_i8x16((__i8x16)(a), i))
+
+// v128_t wasm_i8x16_replace_lane(v128_t a, imm i, int8_t b)
+#define wasm_i8x16_replace_lane(a, i, b)                                       \
+  ((v128_t)__builtin_wasm_replace_lane_i8x16((__i8x16)(a), i, b))
+
+// v128_t wasm_i16x8_splat(int16_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_splat(int16_t a) {
+  return (v128_t)(__i16x8){a, a, a, a, a, a, a, a};
+}
+
+// int16_t wasm_i16x8_extract_lane(v128_t a, imm i)
+#define wasm_i16x8_extract_lane(a, i)                                          \
+  (__builtin_wasm_extract_lane_s_i16x8((__i16x8)(a), i))
+
+// int16_t wasm_u16x8_extract_lane(v128_t a, imm i)
+#define wasm_u16x8_extract_lane(a, i)                                          \
+  (__builtin_wasm_extract_lane_u_i16x8((__i16x8)(a), i))
+
+// v128_t wasm_i16x8_replace_lane(v128_t a, imm i, int16_t b)
+#define wasm_i16x8_replace_lane(a, i, b)                                       \
+  ((v128_t)__builtin_wasm_replace_lane_i16x8((__i16x8)(a), i, b))
+
+// v128_t wasm_i32x4_splat(int32_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_splat(int32_t a) {
+  return (v128_t)(__i32x4){a, a, a, a};
+}
+
+// int32_t wasm_i32x4_extract_lane(v128_t a, imm i)
+#define wasm_i32x4_extract_lane(a, i)                                          \
+  (__builtin_wasm_extract_lane_i32x4((__i32x4)(a), i))
+
+// v128_t wasm_i32x4_replace_lane(v128_t a, imm i, int32_t b)
+#define wasm_i32x4_replace_lane(a, i, b)                                       \
+  ((v128_t)__builtin_wasm_replace_lane_i32x4((__i32x4)(a), i, b))
+
+// v128_t wasm_i64x2_splat(int64_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_splat(int64_t a) {
+  return (v128_t)(__i64x2){a, a};
+}
+
+// int64_t wasm_i64x2_extract_lane(v128_t a, imm i)
+#define wasm_i64x2_extract_lane(a, i)                                          \
+  (__builtin_wasm_extract_lane_i64x2((__i64x2)(a), i))
+
+// v128_t wasm_i64x2_replace_lane(v128_t a, imm i, int64_t b)
+#define wasm_i64x2_replace_lane(a, i, b)                                       \
+  ((v128_t)__builtin_wasm_replace_lane_i64x2((__i64x2)(a), i, b))
+
+// v128_t wasm_f32x4_splat(float a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_splat(float a) {
+  return (v128_t)(__f32x4){a, a, a, a};
+}
+
+// float wasm_f32x4_extract_lane(v128_t a, imm i)
+#define wasm_f32x4_extract_lane(a, i)                                          \
+  (__builtin_wasm_extract_lane_f32x4((__f32x4)(a), i))
+
+// v128_t wasm_f32x4_replace_lane(v128_t a, imm i, float b)
+#define wasm_f32x4_replace_lane(a, i, b)                                       \
+  ((v128_t)__builtin_wasm_replace_lane_f32x4((__f32x4)(a), i, b))
+
+// v128_t wasm_f64x2_splat(double a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_splat(double a) {
+  return (v128_t)(__f64x2){a, a};
+}
+
+// double __builtin_wasm_extract_lane_f64x2(v128_t a, imm i)
+#define wasm_f64x2_extract_lane(a, i)                                          \
+  (__builtin_wasm_extract_lane_f64x2((__f64x2)(a), i))
+
+// v128_t wasm_f64x4_replace_lane(v128_t a, imm i, double b)
+#define wasm_f64x2_replace_lane(a, i, b)                                       \
+  ((v128_t)__builtin_wasm_replace_lane_f64x2((__f64x2)(a), i, b))
+
+// v128_t wasm_i8x16_eq(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_eq(v128_t a, v128_t b) {
+  return (v128_t)((__i8x16)a == (__i8x16)b);
+}
+
+// v128_t wasm_i8x16_ne(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_ne(v128_t a, v128_t b) {
+  return (v128_t)((__i8x16)a != (__i8x16)b);
+}
+
+// v128_t wasm_i8x16_lt(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_lt(v128_t a, v128_t b) {
+  return (v128_t)((__i8x16)a < (__i8x16)b);
+}
+
+// v128_t wasm_u8x16_lt(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_lt(v128_t a, v128_t b) {
+  return (v128_t)((__u8x16)a < (__u8x16)b);
+}
+
+// v128_t wasm_i8x16_gt(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_gt(v128_t a, v128_t b) {
+  return (v128_t)((__i8x16)a > (__i8x16)b);
+}
+
+// v128_t wasm_u8x16_gt(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_gt(v128_t a, v128_t b) {
+  return (v128_t)((__u8x16)a > (__u8x16)b);
+}
+
+// v128_t wasm_i8x16_le(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_le(v128_t a, v128_t b) {
+  return (v128_t)((__i8x16)a <= (__i8x16)b);
+}
+
+// v128_t wasm_i8x16_le(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_le(v128_t a, v128_t b) {
+  return (v128_t)((__u8x16)a <= (__u8x16)b);
+}
+
+// v128_t wasm_i8x16_ge(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_ge(v128_t a, v128_t b) {
+  return (v128_t)((__i8x16)a >= (__i8x16)b);
+}
+
+// v128_t wasm_u8x16_ge(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_ge(v128_t a, v128_t b) {
+  return (v128_t)((__u8x16)a >= (__u8x16)b);
+}
+
+// v128_t wasm_i16x8_eq(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_eq(v128_t a, v128_t b) {
+  return (v128_t)((__i16x8)a == (__i16x8)b);
+}
+
+// v128_t wasm_i16x8_ne(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_ne(v128_t a, v128_t b) {
+  return (v128_t)((__u16x8)a != (__u16x8)b);
+}
+
+// v128_t wasm_i16x8_lt(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_lt(v128_t a, v128_t b) {
+  return (v128_t)((__i16x8)a < (__i16x8)b);
+}
+
+// v128_t wasm_u16x8_lt(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_lt(v128_t a, v128_t b) {
+  return (v128_t)((__u16x8)a < (__u16x8)b);
+}
+
+// v128_t wasm_i16x8_gt(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_gt(v128_t a, v128_t b) {
+  return (v128_t)((__i16x8)a > (__i16x8)b);
+}
+
+// v128_t wasm_u16x8_gt(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_gt(v128_t a, v128_t b) {
+  return (v128_t)((__u16x8)a > (__u16x8)b);
+}
+
+// v128_t wasm_i16x8_le(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_le(v128_t a, v128_t b) {
+  return (v128_t)((__i16x8)a <= (__i16x8)b);
+}
+
+// v128_t wasm_i16x8_le(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_le(v128_t a, v128_t b) {
+  return (v128_t)((__u16x8)a <= (__u16x8)b);
+}
+
+// v128_t wasm_i16x8_ge(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_ge(v128_t a, v128_t b) {
+  return (v128_t)((__i16x8)a >= (__i16x8)b);
+}
+
+// v128_t wasm_i16x8_ge(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_ge(v128_t a, v128_t b) {
+  return (v128_t)((__u16x8)a >= (__u16x8)b);
+}
+
+// v128_t wasm_i32x4_eq(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_eq(v128_t a, v128_t b) {
+  return (v128_t)((__i32x4)a == (__i32x4)b);
+}
+
+// v128_t wasm_i32x4_ne(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_ne(v128_t a, v128_t b) {
+  return (v128_t)((__i32x4)a != (__i32x4)b);
+}
+
+// v128_t wasm_i32x4_lt(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_lt(v128_t a, v128_t b) {
+  return (v128_t)((__i32x4)a < (__i32x4)b);
+}
+
+// v128_t wasm_u32x4_lt(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u32x4_lt(v128_t a, v128_t b) {
+  return (v128_t)((__u32x4)a < (__u32x4)b);
+}
+
+// v128_t wasm_i32x4_gt(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_gt(v128_t a, v128_t b) {
+  return (v128_t)((__i32x4)a > (__i32x4)b);
+}
+
+// v128_t wasm_i32x4_gt(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u32x4_gt(v128_t a, v128_t b) {
+  return (v128_t)((__u32x4)a > (__u32x4)b);
+}
+
+// v128_t wasm_i32x4_le(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_le(v128_t a, v128_t b) {
+  return (v128_t)((__i32x4)a <= (__i32x4)b);
+}
+
+// v128_t wasm_u32x4_le(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u32x4_le(v128_t a, v128_t b) {
+  return (v128_t)((__u32x4)a <= (__u32x4)b);
+}
+
+// v128_t wasm_i32x4_ge(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_ge(v128_t a, v128_t b) {
+  return (v128_t)((__i32x4)a >= (__i32x4)b);
+}
+
+// v128_t wasm_u32x4_ge(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u32x4_ge(v128_t a, v128_t b) {
+  return (v128_t)((__u32x4)a >= (__u32x4)b);
+}
+
+// v128_t wasm_f32x4_eq(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_eq(v128_t a, v128_t b) {
+  return (v128_t)((__f32x4)a == (__f32x4)b);
+}
+
+// v128_t wasm_f32x4_ne(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_ne(v128_t a, v128_t b) {
+  return (v128_t)((__f32x4)a != (__f32x4)b);
+}
+
+// v128_t wasm_f32x4_lt(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_lt(v128_t a, v128_t b) {
+  return (v128_t)((__f32x4)a < (__f32x4)b);
+}
+
+// v128_t wasm_f32x4_gt(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_gt(v128_t a, v128_t b) {
+  return (v128_t)((__f32x4)a > (__f32x4)b);
+}
+
+// v128_t wasm_f32x4_le(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_le(v128_t a, v128_t b) {
+  return (v128_t)((__f32x4)a <= (__f32x4)b);
+}
+
+// v128_t wasm_f32x4_ge(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_ge(v128_t a, v128_t b) {
+  return (v128_t)((__f32x4)a >= (__f32x4)b);
+}
+
+// v128_t wasm_f64x2_eq(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_eq(v128_t a, v128_t b) {
+  return (v128_t)((__f64x2)a == (__f64x2)b);
+}
+
+// v128_t wasm_f64x2_ne(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_ne(v128_t a, v128_t b) {
+  return (v128_t)((__f64x2)a != (__f64x2)b);
+}
+
+// v128_t wasm_f64x2_lt(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_lt(v128_t a, v128_t b) {
+  return (v128_t)((__f64x2)a < (__f64x2)b);
+}
+
+// v128_t wasm_f64x2_gt(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_gt(v128_t a, v128_t b) {
+  return (v128_t)((__f64x2)a > (__f64x2)b);
+}
+
+// v128_t wasm_f64x2_le(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_le(v128_t a, v128_t b) {
+  return (v128_t)((__f64x2)a <= (__f64x2)b);
+}
+
+// v128_t wasm_f64x2_ge(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_ge(v128_t a, v128_t b) {
+  return (v128_t)((__f64x2)a >= (__f64x2)b);
+}
+
+// v128_t wasm_v128_not(v128 a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_v128_not(v128_t a) {
+  return ~a;
+}
+
+// v128_t wasm_v128_and(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_v128_and(v128_t a, v128_t b) {
+  return a & b;
+}
+
+// v128_t wasm_v128_or(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_v128_or(v128_t a, v128_t b) {
+  return a | b;
+}
+
+// v128_t wasm_v128_xor(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_v128_xor(v128_t a, v128_t b) {
+  return a ^ b;
+}
+
+#ifdef __wasm_unimplemented_simd128__
+
+// v128_t wasm_v128_andnot(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_v128_andnot(v128_t a,
+                                                             v128_t b) {
+  return a & ~b;
+}
+
+#endif // __wasm_unimplemented_simd128__
+
+// v128_t wasm_v128_bitselect(v128_t a, v128_t b, v128_t mask)
+// `a` is selected for each lane for which `mask` is nonzero.
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_v128_bitselect(v128_t a,
+                                                                v128_t b,
+                                                                v128_t mask) {
+  return (v128_t)__builtin_wasm_bitselect((__i32x4)a, (__i32x4)b,
+                                          (__i32x4)mask);
+}
+
+// v128_t wasm_i8x16_abs(v128_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_abs(v128_t a) {
+  return (v128_t)__builtin_wasm_abs_i8x16((__i8x16)a);
+}
+
+// v128_t wasm_i8x16_neg(v128_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_neg(v128_t a) {
+  return (v128_t)(-(__u8x16)a);
+}
+
+// bool wasm_i8x16_any_true(v128_t a)
+static __inline__ bool __DEFAULT_FN_ATTRS wasm_i8x16_any_true(v128_t a) {
+  return __builtin_wasm_any_true_i8x16((__i8x16)a);
+}
+
+// bool wasm_i8x16_all_true(v128_t a)
+static __inline__ bool __DEFAULT_FN_ATTRS wasm_i8x16_all_true(v128_t a) {
+  return __builtin_wasm_all_true_i8x16((__i8x16)a);
+}
+
+// v128_t wasm_i8x16_shl(v128_t a, int32_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_shl(v128_t a,
+                                                           int32_t b) {
+  return (v128_t)((__i8x16)a << b);
+}
+
+// v128_t wasm_i8x64_shr(v128_t a, int32_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_shr(v128_t a,
+                                                           int32_t b) {
+  return (v128_t)((__i8x16)a >> b);
+}
+
+// v128_t wasm_u8x16_shr(v128_t a, int32_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_shr(v128_t a,
+                                                           int32_t b) {
+  return (v128_t)((__u8x16)a >> b);
+}
+
+// v128_t wasm_i8x16_add(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_add(v128_t a, v128_t b) {
+  return (v128_t)((__u8x16)a + (__u8x16)b);
+}
+
+// v128_t wasm_add_saturate(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_add_saturate(v128_t a,
+                                                                    v128_t b) {
+  return (v128_t)__builtin_wasm_add_saturate_s_i8x16((__i8x16)a, (__i8x16)b);
+}
+
+// v128_t wasm_add_saturate(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_add_saturate(v128_t a,
+                                                                    v128_t b) {
+  return (v128_t)__builtin_wasm_add_saturate_u_i8x16((__i8x16)a, (__i8x16)b);
+}
+
+// v128_t wasm_i8x16_sub(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_sub(v128_t a, v128_t b) {
+  return (v128_t)((__u8x16)a - (__u8x16)b);
+}
+
+// v128_t wasm_sub_saturate(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_sub_saturate(v128_t a,
+                                                                    v128_t b) {
+  return (v128_t)__builtin_wasm_sub_saturate_s_i8x16((__i8x16)a, (__i8x16)b);
+}
+
+// v128_t wasm_sub_saturate(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_sub_saturate(v128_t a,
+                                                                    v128_t b) {
+  return (v128_t)__builtin_wasm_sub_saturate_u_i8x16((__i8x16)a, (__i8x16)b);
+}
+
+// v128_t wasm_i8x16_mul(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_mul(v128_t a, v128_t b) {
+  return (v128_t)((__u8x16)a * (__u8x16)b);
+}
+
+// v128_t wasm_i8x16_min_s(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_min_s(v128_t a,
+                                                             v128_t b) {
+  return (v128_t)__builtin_wasm_min_s_i8x16((__i8x16)a, (__i8x16)b);
+}
+
+// v128_t wasm_i8x16_min_u(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_min_u(v128_t a,
+                                                             v128_t b) {
+  return (v128_t)__builtin_wasm_min_u_i8x16((__i8x16)a, (__i8x16)b);
+}
+
+// v128_t wasm_i8x16_max_s(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_max_s(v128_t a,
+                                                             v128_t b) {
+  return (v128_t)__builtin_wasm_max_s_i8x16((__i8x16)a, (__i8x16)b);
+}
+
+// v128_t wasm_i8x16_max_u(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_max_u(v128_t a,
+                                                             v128_t b) {
+  return (v128_t)__builtin_wasm_max_u_i8x16((__i8x16)a, (__i8x16)b);
+}
+
+// v128_t wasm_i8x16_avgr_u(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_avgr_u(v128_t a,
+                                                              v128_t b) {
+  return (v128_t)__builtin_wasm_avgr_u_i8x16((__i8x16)a, (__i8x16)b);
+}
+
+// v128_t wasm_i16x8_abs(v128_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_abs(v128_t a) {
+  return (v128_t)__builtin_wasm_abs_i16x8((__i16x8)a);
+}
+
+// v128_t wasm_i16x8_neg(v128_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_neg(v128_t a) {
+  return (v128_t)(-(__u16x8)a);
+}
+
+// bool wasm_i16x8_any_true(v128_t a)
+static __inline__ bool __DEFAULT_FN_ATTRS wasm_i16x8_any_true(v128_t a) {
+  return __builtin_wasm_any_true_i16x8((__i16x8)a);
+}
+
+// bool wasm_i16x8_all_true(v128_t a)
+static __inline__ bool __DEFAULT_FN_ATTRS wasm_i16x8_all_true(v128_t a) {
+  return __builtin_wasm_all_true_i16x8((__i16x8)a);
+}
+
+// v128_t wasm_i16x8_shl(v128_t a, int32_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_shl(v128_t a,
+                                                           int32_t b) {
+  return (v128_t)((__i16x8)a << b);
+}
+
+// v128_t wasm_i16x8_shr(v128_t a, int32_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_shr(v128_t a,
+                                                           int32_t b) {
+  return (v128_t)((__i16x8)a >> b);
+}
+
+// v128_t wasm_u16x8_shr(v128_t a, int32_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_shr(v128_t a,
+                                                           int32_t b) {
+  return (v128_t)((__u16x8)a >> b);
+}
+
+// v128_t wasm_i16x8_add(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_add(v128_t a, v128_t b) {
+  return (v128_t)((__u16x8)a + (__u16x8)b);
+}
+
+// v128_t wasm_add_saturate(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_add_saturate(v128_t a,
+                                                                    v128_t b) {
+  return (v128_t)__builtin_wasm_add_saturate_s_i16x8((__i16x8)a, (__i16x8)b);
+}
+
+// v128_t wasm_add_saturate(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_add_saturate(v128_t a,
+                                                                    v128_t b) {
+  return (v128_t)__builtin_wasm_add_saturate_u_i16x8((__i16x8)a, (__i16x8)b);
+}
+
+// v128_t wasm_i16x8_sub(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_sub(v128_t a, v128_t b) {
+  return (v128_t)((__i16x8)a - (__i16x8)b);
+}
+
+// v128_t wasm_sub_saturate(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_sub_saturate(v128_t a,
+                                                                    v128_t b) {
+  return (v128_t)__builtin_wasm_sub_saturate_s_i16x8((__i16x8)a, (__i16x8)b);
+}
+
+// v128_t wasm_sub_saturate(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_sub_saturate(v128_t a,
+                                                                    v128_t b) {
+  return (v128_t)__builtin_wasm_sub_saturate_u_i16x8((__i16x8)a, (__i16x8)b);
+}
+
+// v128_t wasm_i16x8_mul(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_mul(v128_t a, v128_t b) {
+  return (v128_t)((__u16x8)a * (__u16x8)b);
+}
+
+// v128_t wasm_i16x8_min_s(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_min_s(v128_t a,
+                                                             v128_t b) {
+  return (v128_t)__builtin_wasm_min_s_i16x8((__i16x8)a, (__i16x8)b);
+}
+
+// v128_t wasm_i16x8_min_u(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_min_u(v128_t a,
+                                                             v128_t b) {
+  return (v128_t)__builtin_wasm_min_u_i16x8((__i16x8)a, (__i16x8)b);
+}
+
+// v128_t wasm_i16x8_max_s(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_max_s(v128_t a,
+                                                             v128_t b) {
+  return (v128_t)__builtin_wasm_max_s_i16x8((__i16x8)a, (__i16x8)b);
+}
+
+// v128_t wasm_i16x8_max_u(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_max_u(v128_t a,
+                                                             v128_t b) {
+  return (v128_t)__builtin_wasm_max_u_i16x8((__i16x8)a, (__i16x8)b);
+}
+
+// v128_t wasm_i16x8_avgr_u(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_avgr_u(v128_t a,
+                                                              v128_t b) {
+  return (v128_t)__builtin_wasm_avgr_u_i16x8((__i16x8)a, (__i16x8)b);
+}
+
+// v128_t wasm_i32x4_abs(v128_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_abs(v128_t a) {
+  return (v128_t)__builtin_wasm_abs_i32x4((__i32x4)a);
+}
+
+// v128_t wasm_i32x4_neg(v128_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_neg(v128_t a) {
+  return (v128_t)(-(__u32x4)a);
+}
+
+// bool wasm_i32x4_any_true(v128_t a)
+static __inline__ bool __DEFAULT_FN_ATTRS wasm_i32x4_any_true(v128_t a) {
+  return __builtin_wasm_any_true_i32x4((__i32x4)a);
+}
+
+// bool wasm_i32x4_all_true(v128_t a)
+static __inline__ bool __DEFAULT_FN_ATTRS wasm_i32x4_all_true(v128_t a) {
+  return __builtin_wasm_all_true_i32x4((__i32x4)a);
+}
+
+// v128_t wasm_i32x4_shl(v128_t a, int32_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_shl(v128_t a,
+                                                           int32_t b) {
+  return (v128_t)((__i32x4)a << b);
+}
+
+// v128_t wasm_i32x4_shr(v128_t a, int32_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_shr(v128_t a,
+                                                           int32_t b) {
+  return (v128_t)((__i32x4)a >> b);
+}
+
+// v128_t wasm_u32x4_shr(v128_t a, int32_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u32x4_shr(v128_t a,
+                                                           int32_t b) {
+  return (v128_t)((__u32x4)a >> b);
+}
+
+// v128_t wasm_i32x4_add(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_add(v128_t a, v128_t b) {
+  return (v128_t)((__u32x4)a + (__u32x4)b);
+}
+
+// v128_t wasm_i32x4_sub(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_sub(v128_t a, v128_t b) {
+  return (v128_t)((__u32x4)a - (__u32x4)b);
+}
+
+// v128_t wasm_i32x4_mul(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_mul(v128_t a, v128_t b) {
+  return (v128_t)((__u32x4)a * (__u32x4)b);
+}
+
+// v128_t wasm_i32x4_min_s(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_min_s(v128_t a,
+                                                             v128_t b) {
+  return (v128_t)__builtin_wasm_min_s_i32x4((__i32x4)a, (__i32x4)b);
+}
+
+// v128_t wasm_i32x4_min_u(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_min_u(v128_t a,
+                                                             v128_t b) {
+  return (v128_t)__builtin_wasm_min_u_i32x4((__i32x4)a, (__i32x4)b);
+}
+
+// v128_t wasm_i32x4_max_s(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_max_s(v128_t a,
+                                                             v128_t b) {
+  return (v128_t)__builtin_wasm_max_s_i32x4((__i32x4)a, (__i32x4)b);
+}
+
+// v128_t wasm_i32x4_max_u(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_max_u(v128_t a,
+                                                             v128_t b) {
+  return (v128_t)__builtin_wasm_max_u_i32x4((__i32x4)a, (__i32x4)b);
+}
+
+// v128_t wasm_i64x2_neg(v128_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_neg(v128_t a) {
+  return (v128_t)(-(__u64x2)a);
+}
+
+#ifdef __wasm_unimplemented_simd128__
+
+// bool wasm_i64x2_any_true(v128_t a)
+static __inline__ bool __DEFAULT_FN_ATTRS wasm_i64x2_any_true(v128_t a) {
+  return __builtin_wasm_any_true_i64x2((__i64x2)a);
+}
+
+// bool wasm_i64x2_all_true(v128_t a)
+static __inline__ bool __DEFAULT_FN_ATTRS wasm_i64x2_all_true(v128_t a) {
+  return __builtin_wasm_all_true_i64x2((__i64x2)a);
+}
+
+#endif // __wasm_unimplemented_simd128__
+
+// v128_t wasm_i64x2_shl(v128_t a, int32_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_shl(v128_t a,
+                                                           int32_t b) {
+  return (v128_t)((__i64x2)a << (int64_t)b);
+}
+
+// v128_t wasm_i64x2_shr(v128_t a, int32_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_shr(v128_t a,
+                                                           int32_t b) {
+  return (v128_t)((__i64x2)a >> (int64_t)b);
+}
+
+// v128_t wasm_u64x2_shr_u(v128_t a, int32_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u64x2_shr(v128_t a,
+                                                           int32_t b) {
+  return (v128_t)((__u64x2)a >> (int64_t)b);
+}
+
+// v128_t wasm_i64x2_add(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_add(v128_t a, v128_t b) {
+  return (v128_t)((__u64x2)a + (__u64x2)b);
+}
+
+// v128_t wasm_i64x2_sub(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_sub(v128_t a, v128_t b) {
+  return (v128_t)((__u64x2)a - (__u64x2)b);
+}
+
+// v128_t  wasm_f32x4_abs(v128_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_abs(v128_t a) {
+  return (v128_t)__builtin_wasm_abs_f32x4((__f32x4)a);
+}
+
+// v128_t wasm_f32x4_neg(v128_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_neg(v128_t a) {
+  return (v128_t)(-(__f32x4)a);
+}
+
+// v128_t wasm_f32x4_sqrt(v128_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_sqrt(v128_t a) {
+  return (v128_t)__builtin_wasm_sqrt_f32x4((__f32x4)a);
+}
+
+#ifdef __wasm_unimplemented_simd128__
+
+// v128_t wasm_f32x4_qfma(v128_t a, v128_t b, v128_t c)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_qfma(v128_t a, v128_t b,
+                                                            v128_t c) {
+  return (v128_t)__builtin_wasm_qfma_f32x4((__f32x4)a, (__f32x4)b, (__f32x4)c);
+}
+
+// v128_t wasm_f32x4_qfms(v128_t a, v128_t b, v128_t c)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_qfms(v128_t a, v128_t b,
+                                                            v128_t c) {
+  return (v128_t)__builtin_wasm_qfms_f32x4((__f32x4)a, (__f32x4)b, (__f32x4)c);
+}
+
+#endif // __wasm_unimplemented_simd128__
+
+// v128_t wasm_f32x4_add(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_add(v128_t a, v128_t b) {
+  return (v128_t)((__f32x4)a + (__f32x4)b);
+}
+
+// v128_t wasm_f32x4_sub(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_sub(v128_t a, v128_t b) {
+  return (v128_t)((__f32x4)a - (__f32x4)b);
+}
+
+// v128_t wasm_f32x4_mul(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_mul(v128_t a, v128_t b) {
+  return (v128_t)((__f32x4)a * (__f32x4)b);
+}
+
+// v128_t wasm_f32x4_div(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_div(v128_t a, v128_t b) {
+  return (v128_t)((__f32x4)a / (__f32x4)b);
+}
+
+// v128_t wasm_f32x4_min(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_min(v128_t a, v128_t b) {
+  return (v128_t)__builtin_wasm_min_f32x4((__f32x4)a, (__f32x4)b);
+}
+
+// v128_t wasm_f32x4_max(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_max(v128_t a, v128_t b) {
+  return (v128_t)__builtin_wasm_max_f32x4((__f32x4)a, (__f32x4)b);
+}
+
+// v128_t  wasm_f64x2_abs(v128_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_abs(v128_t a) {
+  return (v128_t)__builtin_wasm_abs_f64x2((__f64x2)a);
+}
+
+// v128_t wasm_f64x2_neg(v128_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_neg(v128_t a) {
+  return (v128_t)(-(__f64x2)a);
+}
+
+// v128_t  wasm_f64x2_sqrt(v128_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_sqrt(v128_t a) {
+  return (v128_t)__builtin_wasm_sqrt_f64x2((__f64x2)a);
+}
+
+#ifdef __wasm_unimplemented_simd128__
+
+// v128_t wasm_f64x2_qfma(v128_t a, v128_t b, v128_t c)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_qfma(v128_t a, v128_t b,
+                                                            v128_t c) {
+  return (v128_t)__builtin_wasm_qfma_f64x2((__f64x2)a, (__f64x2)b, (__f64x2)c);
+}
+
+// v128_t wasm_f64x2_qfms(v128_t a, v128_t b, v128_t c)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_qfms(v128_t a, v128_t b,
+                                                            v128_t c) {
+  return (v128_t)__builtin_wasm_qfms_f64x2((__f64x2)a, (__f64x2)b, (__f64x2)c);
+}
+
+#endif // __wasm_unimplemented_simd128__
+
+// v128_t wasm_f64x2_add(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_add(v128_t a, v128_t b) {
+  return (v128_t)((__f64x2)a + (__f64x2)b);
+}
+
+// v128_t wasm_f64x2_sub(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_sub(v128_t a, v128_t b) {
+  return (v128_t)((__f64x2)a - (__f64x2)b);
+}
+
+// v128_t wasm_f64x2_mul(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_mul(v128_t a, v128_t b) {
+  return (v128_t)((__f64x2)a * (__f64x2)b);
+}
+
+// v128_t wasm_f64x2_div(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_div(v128_t a, v128_t b) {
+  return (v128_t)((__f64x2)a / (__f64x2)b);
+}
+
+// v128_t wasm_f64x2_min(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_min(v128_t a, v128_t b) {
+  return (v128_t)__builtin_wasm_min_f64x2((__f64x2)a, (__f64x2)b);
+}
+
+// v128_t wasm_f64x2_max(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_max(v128_t a, v128_t b) {
+  return (v128_t)__builtin_wasm_max_f64x2((__f64x2)a, (__f64x2)b);
+}
+
+// v128_t wasm_i32x4_trunc_saturate_f32x4(v128_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS
+wasm_i32x4_trunc_saturate_f32x4(v128_t a) {
+  return (v128_t)__builtin_wasm_trunc_saturate_s_i32x4_f32x4((__f32x4)a);
+}
+
+// v128_t wasm_u32x4_trunc_saturate_f32x4(v128_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS
+wasm_u32x4_trunc_saturate_f32x4(v128_t a) {
+  return (v128_t)__builtin_wasm_trunc_saturate_u_i32x4_f32x4((__f32x4)a);
+}
+
+#ifdef __wasm_unimplemented_simd128__
+
+// v128_t wasm_i64x2_trunc_saturate_f32x4(v128_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS
+wasm_i64x2_trunc_saturate_f64x2(v128_t a) {
+  return (v128_t)__builtin_wasm_trunc_saturate_s_i64x2_f64x2((__f64x2)a);
+}
+
+// v128_t wasm_u64x2_trunc_saturate_f64x2(v128_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS
+wasm_u64x2_trunc_saturate_f64x2(v128_t a) {
+  return (v128_t)__builtin_wasm_trunc_saturate_s_i64x2_f64x2((__f64x2)a);
+}
+
+#endif // __wasm_unimplemented_simd128__
+
+// v128_t wasm_f32x4_convert_i32x4(v128_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_convert_i32x4(v128_t a) {
+  return (v128_t) __builtin_convertvector((__i32x4)a, __f32x4);
+}
+
+// v128_t wasm_f32x4_convert_u32x4(v128_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f32x4_convert_u32x4(v128_t a) {
+  return (v128_t) __builtin_convertvector((__u32x4)a, __f32x4);
+}
+
+#ifdef __wasm_unimplemented_simd128__
+
+// v128_t wasm_f64x2_convert_i64x2(v128_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_convert_i64x2(v128_t a) {
+  return (v128_t) __builtin_convertvector((__i64x2)a, __f64x2);
+}
+
+// v128_t wasm_f64x2_convert_u64x2(v128_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_convert_u64x2(v128_t a) {
+  return (v128_t) __builtin_convertvector((__u64x2)a, __f64x2);
+}
+
+#endif // __wasm_unimplemented_simd128__
+
+// v128_t wasm_v8x16_shuffle(v128_t a, v128_t b, c0, ..., c15)
+#define wasm_v8x16_shuffle(a, b, c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10,  \
+                           c11, c12, c13, c14, c15)                            \
+  ((v128_t)(__builtin_shufflevector((__u8x16)(a), (__u8x16)(b), c0, c1, c2,    \
+                                    c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, \
+                                    c13, c14, c15)))
+
+// v128_t wasm_v16x8_shuffle(v128_t a, v128_t b, c0, ..., c7)
+#define wasm_v16x8_shuffle(a, b, c0, c1, c2, c3, c4, c5, c6, c7)               \
+  ((v128_t)(__builtin_shufflevector((__u16x8)(a), (__u16x8)(b), c0, c1, c2,    \
+                                    c3, c4, c5, c6, c7)))
+
+// v128_t wasm_v32x4_shuffle(v128_t a, v128_t b, c0, ..., c3)
+#define wasm_v32x4_shuffle(a, b, c0, c1, c2, c3)                               \
+  ((v128_t)(                                                                   \
+      __builtin_shufflevector((__u32x4)(a), (__u32x4)(b), c0, c1, c2, c3)))
+
+// v128_t wasm_v64x2_shuffle(v128_t a, v128_t b, c0, c1)
+#define wasm_v64x2_shuffle(a, b, c0, c1)                                       \
+  ((v128_t)(__builtin_shufflevector((__u64x2)(a), (__u64x2)(b), c0, c1)))
+
+#ifdef __wasm_unimplemented_simd128__
+
+// v128_t wasm_v8x16_swizzle(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_v8x16_swizzle(v128_t a,
+                                                               v128_t b) {
+  return (v128_t)__builtin_wasm_swizzle_v8x16((__i8x16)a, (__i8x16)b);
+}
+
+#endif // __wasm_unimplemented_simd128__
+
+// v128_t wasm_i8x16_narrow_i16x8(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_narrow_i16x8(v128_t a,
+                                                                    v128_t b) {
+  return (v128_t)__builtin_wasm_narrow_s_i8x16_i16x8((__i16x8)a, (__i16x8)b);
+}
+
+// v128_t wasm_u8x16_narrow_i16x8(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_narrow_i16x8(v128_t a,
+                                                                    v128_t b) {
+  return (v128_t)__builtin_wasm_narrow_u_i8x16_i16x8((__i16x8)a, (__i16x8)b);
+}
+
+// v128_t wasm_i16x8_narrow_i32x4(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_narrow_i32x4(v128_t a,
+                                                                    v128_t b) {
+  return (v128_t)__builtin_wasm_narrow_s_i16x8_i32x4((__i32x4)a, (__i32x4)b);
+}
+
+// v128_t wasm_u16x8_narrow_i32x4(v128_t a, v128_t b)
+static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_narrow_i32x4(v128_t a,
+                                                                    v128_t b) {
+  return (v128_t)__builtin_wasm_narrow_u_i16x8_i32x4((__i32x4)a, (__i32x4)b);
+}
+
+// v128_t wasm_i16x8_widen_low_i8x16(v128_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS
+wasm_i16x8_widen_low_i8x16(v128_t a) {
+  return (v128_t)__builtin_wasm_widen_low_s_i16x8_i8x16((__i8x16)a);
+}
+
+// v128_t wasm_i16x8_widen_high_i8x16(v128_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS
+wasm_i16x8_widen_high_i8x16(v128_t a) {
+  return (v128_t)__builtin_wasm_widen_high_s_i16x8_i8x16((__i8x16)a);
+}
+
+// v128_t wasm_i16x8_widen_low_u8x16(v128_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS
+wasm_i16x8_widen_low_u8x16(v128_t a) {
+  return (v128_t)__builtin_wasm_widen_low_u_i16x8_i8x16((__i8x16)a);
+}
+
+// v128_t wasm_i16x8_widen_high_u8x16(v128_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS
+wasm_i16x8_widen_high_u8x16(v128_t a) {
+  return (v128_t)__builtin_wasm_widen_high_u_i16x8_i8x16((__i8x16)a);
+}
+
+// v128_t wasm_i32x4_widen_low_i16x8(v128_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS
+wasm_i32x4_widen_low_i16x8(v128_t a) {
+  return (v128_t)__builtin_wasm_widen_low_s_i32x4_i16x8((__i16x8)a);
+}
+
+// v128_t wasm_i32x4_widen_high_i16x8(v128_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS
+wasm_i32x4_widen_high_i16x8(v128_t a) {
+  return (v128_t)__builtin_wasm_widen_high_s_i32x4_i16x8((__i16x8)a);
+}
+
+// v128_t wasm_i32x4_widen_low_u16x8(v128_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS
+wasm_i32x4_widen_low_u16x8(v128_t a) {
+  return (v128_t)__builtin_wasm_widen_low_u_i32x4_i16x8((__i16x8)a);
+}
+
+// v128_t wasm_i32x4_widen_high_u16x8(v128_t a)
+static __inline__ v128_t __DEFAULT_FN_ATTRS
+wasm_i32x4_widen_high_u16x8(v128_t a) {
+  return (v128_t)__builtin_wasm_widen_high_u_i32x4_i16x8((__i16x8)a);
+}