diff --git a/clang/include/clang/Basic/BuiltinsWebAssembly.def b/clang/include/clang/Basic/BuiltinsWebAssembly.def --- a/clang/include/clang/Basic/BuiltinsWebAssembly.def +++ b/clang/include/clang/Basic/BuiltinsWebAssembly.def @@ -118,6 +118,21 @@ TARGET_BUILTIN(__builtin_wasm_q15mulr_saturate_s_i8x16, "V8sV8sV8s", "nc", "simd128") +TARGET_BUILTIN(__builtin_wasm_extmul_low_i8x16_s_i16x8, "V8sV16ScV16Sc", "nc", "simd128") +TARGET_BUILTIN(__builtin_wasm_extmul_high_i8x16_s_i16x8, "V8sV16ScV16Sc", "nc", "simd128") +TARGET_BUILTIN(__builtin_wasm_extmul_low_i8x16_u_i16x8, "V8UsV16UcV16Uc", "nc", "simd128") +TARGET_BUILTIN(__builtin_wasm_extmul_high_i8x16_u_i16x8, "V8UsV16UcV16Uc", "nc", "simd128") + +TARGET_BUILTIN(__builtin_wasm_extmul_low_i16x8_s_i32x4, "V4iV8sV8s", "nc", "simd128") +TARGET_BUILTIN(__builtin_wasm_extmul_high_i16x8_s_i32x4, "V4iV8sV8s", "nc", "simd128") +TARGET_BUILTIN(__builtin_wasm_extmul_low_i16x8_u_i32x4, "V4UiV8UsV8Us", "nc", "simd128") +TARGET_BUILTIN(__builtin_wasm_extmul_high_i16x8_u_i32x4, "V4UiV8UsV8Us", "nc", "simd128") + +TARGET_BUILTIN(__builtin_wasm_extmul_low_i32x4_s_i64x2, "V2LLiV4iV4i", "nc", "simd128") +TARGET_BUILTIN(__builtin_wasm_extmul_high_i32x4_s_i64x2, "V2LLiV4iV4i", "nc", "simd128") +TARGET_BUILTIN(__builtin_wasm_extmul_low_i32x4_u_i64x2, "V2ULLiV4UiV4Ui", "nc", "simd128") +TARGET_BUILTIN(__builtin_wasm_extmul_high_i32x4_u_i64x2, "V2ULLiV4UiV4Ui", "nc", "simd128") + TARGET_BUILTIN(__builtin_wasm_bitselect, "V4iV4iV4iV4i", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_shuffle_v8x16, "V16ScV16ScV16ScIiIiIiIiIiIiIiIiIiIiIiIiIiIiIiIi", "nc", "simd128") diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -16600,6 +16600,49 @@ CGM.getIntrinsic(Intrinsic::wasm_q15mulr_saturate_signed); return Builder.CreateCall(Callee, {LHS, RHS}); } + case WebAssembly::BI__builtin_wasm_extmul_low_i8x16_s_i16x8: + case WebAssembly::BI__builtin_wasm_extmul_high_i8x16_s_i16x8: + case WebAssembly::BI__builtin_wasm_extmul_low_i8x16_u_i16x8: + case WebAssembly::BI__builtin_wasm_extmul_high_i8x16_u_i16x8: + case WebAssembly::BI__builtin_wasm_extmul_low_i16x8_s_i32x4: + case WebAssembly::BI__builtin_wasm_extmul_high_i16x8_s_i32x4: + case WebAssembly::BI__builtin_wasm_extmul_low_i16x8_u_i32x4: + case WebAssembly::BI__builtin_wasm_extmul_high_i16x8_u_i32x4: + case WebAssembly::BI__builtin_wasm_extmul_low_i32x4_s_i64x2: + case WebAssembly::BI__builtin_wasm_extmul_high_i32x4_s_i64x2: + case WebAssembly::BI__builtin_wasm_extmul_low_i32x4_u_i64x2: + case WebAssembly::BI__builtin_wasm_extmul_high_i32x4_u_i64x2: { + Value *LHS = EmitScalarExpr(E->getArg(0)); + Value *RHS = EmitScalarExpr(E->getArg(1)); + unsigned IntNo; + switch (BuiltinID) { + case WebAssembly::BI__builtin_wasm_extmul_low_i8x16_s_i16x8: + case WebAssembly::BI__builtin_wasm_extmul_low_i16x8_s_i32x4: + case WebAssembly::BI__builtin_wasm_extmul_low_i32x4_s_i64x2: + IntNo = Intrinsic::wasm_extmul_low_signed; + break; + case WebAssembly::BI__builtin_wasm_extmul_low_i8x16_u_i16x8: + case WebAssembly::BI__builtin_wasm_extmul_low_i16x8_u_i32x4: + case WebAssembly::BI__builtin_wasm_extmul_low_i32x4_u_i64x2: + IntNo = Intrinsic::wasm_extmul_low_unsigned; + break; + case WebAssembly::BI__builtin_wasm_extmul_high_i8x16_s_i16x8: + case WebAssembly::BI__builtin_wasm_extmul_high_i16x8_s_i32x4: + case WebAssembly::BI__builtin_wasm_extmul_high_i32x4_s_i64x2: + IntNo = Intrinsic::wasm_extmul_high_signed; + break; + case WebAssembly::BI__builtin_wasm_extmul_high_i8x16_u_i16x8: + case WebAssembly::BI__builtin_wasm_extmul_high_i16x8_u_i32x4: + case WebAssembly::BI__builtin_wasm_extmul_high_i32x4_u_i64x2: + IntNo = Intrinsic::wasm_extmul_high_unsigned; + break; + default: + llvm_unreachable("unexptected builtin ID"); + } + + Function *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType())); + return Builder.CreateCall(Callee, {LHS, RHS}); + } case WebAssembly::BI__builtin_wasm_bitselect: { Value *V1 = EmitScalarExpr(E->getArg(0)); Value *V2 = EmitScalarExpr(E->getArg(1)); diff --git a/clang/test/CodeGen/builtins-wasm.c b/clang/test/CodeGen/builtins-wasm.c --- a/clang/test/CodeGen/builtins-wasm.c +++ b/clang/test/CodeGen/builtins-wasm.c @@ -525,6 +525,90 @@ // WEBASSEMBLY-NEXT: ret } +i16x8 extmul_low_i8x16_s_i16x8(i8x16 x, i8x16 y) { + return __builtin_wasm_extmul_low_i8x16_s_i16x8(x, y); + // WEBASSEMBLY: call <8 x i16> @llvm.wasm.extmul.low.signed.v8i16( + // WEBASSEMBLY-SAME: <16 x i8> %x, <16 x i8> %y) + // WEBASSEMBLY-NEXT: ret +} + +i16x8 extmul_high_i8x16_s_i16x8(i8x16 x, i8x16 y) { + return __builtin_wasm_extmul_high_i8x16_s_i16x8(x, y); + // WEBASSEMBLY: call <8 x i16> @llvm.wasm.extmul.high.signed.v8i16( + // WEBASSEMBLY-SAME: <16 x i8> %x, <16 x i8> %y) + // WEBASSEMBLY-NEXT: ret +} + +u16x8 extmul_low_i8x16_u_i16x8(u8x16 x, u8x16 y) { + return __builtin_wasm_extmul_low_i8x16_u_i16x8(x, y); + // WEBASSEMBLY: call <8 x i16> @llvm.wasm.extmul.low.unsigned.v8i16( + // WEBASSEMBLY-SAME: <16 x i8> %x, <16 x i8> %y) + // WEBASSEMBLY-NEXT: ret +} + +u16x8 extmul_high_i8x16_u_i16x8(u8x16 x, u8x16 y) { + return __builtin_wasm_extmul_high_i8x16_u_i16x8(x, y); + // WEBASSEMBLY: call <8 x i16> @llvm.wasm.extmul.high.unsigned.v8i16( + // WEBASSEMBLY-SAME: <16 x i8> %x, <16 x i8> %y) + // WEBASSEMBLY-NEXT: ret +} + +i32x4 extmul_low_i16x8_s_i32x4(i16x8 x, i16x8 y) { + return __builtin_wasm_extmul_low_i16x8_s_i32x4(x, y); + // WEBASSEMBLY: call <4 x i32> @llvm.wasm.extmul.low.signed.v4i32( + // WEBASSEMBLY-SAME: <8 x i16> %x, <8 x i16> %y) + // WEBASSEMBLY-NEXT: ret +} + +i32x4 extmul_high_i16x8_s_i32x4(i16x8 x, i16x8 y) { + return __builtin_wasm_extmul_high_i16x8_s_i32x4(x, y); + // WEBASSEMBLY: call <4 x i32> @llvm.wasm.extmul.high.signed.v4i32( + // WEBASSEMBLY-SAME: <8 x i16> %x, <8 x i16> %y) + // WEBASSEMBLY-NEXT: ret +} + +u32x4 extmul_low_i16x8_u_i32x4(u16x8 x, u16x8 y) { + return __builtin_wasm_extmul_low_i16x8_u_i32x4(x, y); + // WEBASSEMBLY: call <4 x i32> @llvm.wasm.extmul.low.unsigned.v4i32( + // WEBASSEMBLY-SAME: <8 x i16> %x, <8 x i16> %y) + // WEBASSEMBLY-NEXT: ret +} + +u32x4 extmul_high_i16x8_u_i32x4(u16x8 x, u16x8 y) { + return __builtin_wasm_extmul_high_i16x8_u_i32x4(x, y); + // WEBASSEMBLY: call <4 x i32> @llvm.wasm.extmul.high.unsigned.v4i32( + // WEBASSEMBLY-SAME: <8 x i16> %x, <8 x i16> %y) + // WEBASSEMBLY-NEXT: ret +} + +i64x2 extmul_low_i32x4_s_i64x2(i32x4 x, i32x4 y) { + return __builtin_wasm_extmul_low_i32x4_s_i64x2(x, y); + // WEBASSEMBLY: call <2 x i64> @llvm.wasm.extmul.low.signed.v2i64( + // WEBASSEMBLY-SAME: <4 x i32> %x, <4 x i32> %y) + // WEBASSEMBLY-NEXT: ret +} + +i64x2 extmul_high_i32x4_s_i64x2(i32x4 x, i32x4 y) { + return __builtin_wasm_extmul_high_i32x4_s_i64x2(x, y); + // WEBASSEMBLY: call <2 x i64> @llvm.wasm.extmul.high.signed.v2i64( + // WEBASSEMBLY-SAME: <4 x i32> %x, <4 x i32> %y) + // WEBASSEMBLY-NEXT: ret +} + +u64x2 extmul_low_i32x4_u_i64x2(u32x4 x, u32x4 y) { + return __builtin_wasm_extmul_low_i32x4_u_i64x2(x, y); + // WEBASSEMBLY: call <2 x i64> @llvm.wasm.extmul.low.unsigned.v2i64( + // WEBASSEMBLY-SAME: <4 x i32> %x, <4 x i32> %y) + // WEBASSEMBLY-NEXT: ret +} + +u64x2 extmul_high_i32x4_u_i64x2(u32x4 x, u32x4 y) { + return __builtin_wasm_extmul_high_i32x4_u_i64x2(x, y); + // WEBASSEMBLY: call <2 x i64> @llvm.wasm.extmul.high.unsigned.v2i64( + // WEBASSEMBLY-SAME: <4 x i32> %x, <4 x i32> %y) + // WEBASSEMBLY-NEXT: ret +} + i32x4 dot_i16x8_s(i16x8 x, i16x8 y) { return __builtin_wasm_dot_s_i32x4_i16x8(x, y); // WEBASSEMBLY: call <4 x i32> @llvm.wasm.dot(<8 x i16> %x, <8 x i16> %y) diff --git a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td --- a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td +++ b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td @@ -259,6 +259,23 @@ def int_wasm_popcnt : Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty], [IntrNoMem, IntrSpeculatable]>; +def int_wasm_extmul_low_signed : + Intrinsic<[llvm_anyvector_ty], + [LLVMSubdivide2VectorType<0>, LLVMSubdivide2VectorType<0>], + [IntrNoMem, IntrSpeculatable]>; +def int_wasm_extmul_high_signed : + Intrinsic<[llvm_anyvector_ty], + [LLVMSubdivide2VectorType<0>, LLVMSubdivide2VectorType<0>], + [IntrNoMem, IntrSpeculatable]>; +def int_wasm_extmul_low_unsigned : + Intrinsic<[llvm_anyvector_ty], + [LLVMSubdivide2VectorType<0>, LLVMSubdivide2VectorType<0>], + [IntrNoMem, IntrSpeculatable]>; +def int_wasm_extmul_high_unsigned : + Intrinsic<[llvm_anyvector_ty], + [LLVMSubdivide2VectorType<0>, LLVMSubdivide2VectorType<0>], + [IntrNoMem, IntrSpeculatable]>; + //===----------------------------------------------------------------------===// // Thread-local storage intrinsics //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp --- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp +++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp @@ -62,12 +62,16 @@ uint64_t Start = OS.tell(); uint64_t Binary = getBinaryCodeForInstr(MI, Fixups, STI); - if (Binary <= UINT8_MAX) { + if (Binary < (1 << 8)) { OS << uint8_t(Binary); - } else { - assert(Binary <= UINT16_MAX && "Several-byte opcodes not supported yet"); + } else if (Binary < (1 << 16)) { OS << uint8_t(Binary >> 8); encodeULEB128(uint8_t(Binary), OS); + } else if (Binary < (1 << 24)) { + OS << uint8_t(Binary >> 16); + encodeULEB128(uint16_t(Binary), OS); + } else { + llvm_unreachable("Very large (prefix + 3 byte) opcodes not supported"); } // For br_table instructions, encode the size of the table. In the MCInst, diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td @@ -16,7 +16,9 @@ list pattern_r, string asmstr_r = "", string asmstr_s = "", bits<32> simdop = -1> { defm "" : I, + !if(!ge(simdop, 0x100), + !or(0xfd0000, !and(0xffff, simdop)), + !or(0xfd00, !and(0xff, simdop)))>, Requires<[HasSIMD128]>; } @@ -935,6 +937,57 @@ "i32x4.dot_i16x8_s\t$dst, $lhs, $rhs", "i32x4.dot_i16x8_s", 186>; +// Extending multiplication: extmul_{low,high}_P, extmul_high +multiclass SIMDExtBinary simdop> { + defm _#vec_t : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs), + (outs), (ins), + [(set (vec_t V128:$dst), + (node (arg_t V128:$lhs), (arg_t V128:$rhs)) + )], + vec#"."#name#"\t$dst, $lhs, $rhs", vec#"."#name, + simdop>; +} + +defm EXTMUL_LOW_S : + SIMDExtBinary; +defm EXTMUL_HIGH_S : + SIMDExtBinary; +defm EXTMUL_LOW_U : + SIMDExtBinary; +defm EXTMUL_HIGH_U : + SIMDExtBinary; + +defm EXTMUL_LOW_S : + SIMDExtBinary; +defm EXTMUL_HIGH_S : + SIMDExtBinary; +defm EXTMUL_LOW_U : + SIMDExtBinary; +defm EXTMUL_HIGH_U : + SIMDExtBinary; + +defm EXTMUL_LOW_S : + SIMDExtBinary; +defm EXTMUL_HIGH_S : + SIMDExtBinary; +defm EXTMUL_LOW_U : + SIMDExtBinary; +defm EXTMUL_HIGH_U : + SIMDExtBinary; + //===----------------------------------------------------------------------===// // Floating-point unary arithmetic //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll b/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll --- a/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll @@ -249,6 +249,54 @@ ret <8 x i16> %a } +; CHECK-LABEL: extmul_low_s_v8i16: +; SIMD128-NEXT: .functype extmul_low_s_v8i16 (v128, v128) -> (v128){{$}} +; SIMD128-NEXT: i16x8.extmul_low_i8x16_s $push[[R:[0-9]+]]=, $0, $1{{$}} +; SIMD128-NEXT: return $pop[[R]]{{$}} +declare <8 x i16> @llvm.wasm.extmul.low.signed.v8i16(<16 x i8>, <16 x i8>) +define <8 x i16> @extmul_low_s_v8i16(<16 x i8> %x, <16 x i8> %y) { + %a = call <8 x i16> @llvm.wasm.extmul.low.signed.v8i16( + <16 x i8> %x, <16 x i8> %y + ) + ret <8 x i16> %a +} + +; CHECK-LABEL: extmul_high_s_v8i16: +; SIMD128-NEXT: .functype extmul_high_s_v8i16 (v128, v128) -> (v128){{$}} +; SIMD128-NEXT: i16x8.extmul_high_i8x16_s $push[[R:[0-9]+]]=, $0, $1{{$}} +; SIMD128-NEXT: return $pop[[R]]{{$}} +declare <8 x i16> @llvm.wasm.extmul.high.signed.v8i16(<16 x i8>, <16 x i8>) +define <8 x i16> @extmul_high_s_v8i16(<16 x i8> %x, <16 x i8> %y) { + %a = call <8 x i16> @llvm.wasm.extmul.high.signed.v8i16( + <16 x i8> %x, <16 x i8> %y + ) + ret <8 x i16> %a +} + +; CHECK-LABEL: extmul_low_u_v8i16: +; SIMD128-NEXT: .functype extmul_low_u_v8i16 (v128, v128) -> (v128){{$}} +; SIMD128-NEXT: i16x8.extmul_low_i8x16_u $push[[R:[0-9]+]]=, $0, $1{{$}} +; SIMD128-NEXT: return $pop[[R]]{{$}} +declare <8 x i16> @llvm.wasm.extmul.low.unsigned.v8i16(<16 x i8>, <16 x i8>) +define <8 x i16> @extmul_low_u_v8i16(<16 x i8> %x, <16 x i8> %y) { + %a = call <8 x i16> @llvm.wasm.extmul.low.unsigned.v8i16( + <16 x i8> %x, <16 x i8> %y + ) + ret <8 x i16> %a +} + +; CHECK-LABEL: extmul_high_u_v8i16: +; SIMD128-NEXT: .functype extmul_high_u_v8i16 (v128, v128) -> (v128){{$}} +; SIMD128-NEXT: i16x8.extmul_high_i8x16_u $push[[R:[0-9]+]]=, $0, $1{{$}} +; SIMD128-NEXT: return $pop[[R]]{{$}} +declare <8 x i16> @llvm.wasm.extmul.high.unsigned.v8i16(<16 x i8>, <16 x i8>) +define <8 x i16> @extmul_high_u_v8i16(<16 x i8> %x, <16 x i8> %y) { + %a = call <8 x i16> @llvm.wasm.extmul.high.unsigned.v8i16( + <16 x i8> %x, <16 x i8> %y + ) + ret <8 x i16> %a +} + ; CHECK-LABEL: any_v8i16: ; SIMD128-NEXT: .functype any_v8i16 (v128) -> (i32){{$}} ; SIMD128-NEXT: i16x8.any_true $push[[R:[0-9]+]]=, $0{{$}} @@ -328,6 +376,55 @@ ret <4 x i32> %a } + +; CHECK-LABEL: extmul_low_s_v4i32: +; SIMD128-NEXT: .functype extmul_low_s_v4i32 (v128, v128) -> (v128){{$}} +; SIMD128-NEXT: i32x4.extmul_low_i16x8_s $push[[R:[0-9]+]]=, $0, $1{{$}} +; SIMD128-NEXT: return $pop[[R]]{{$}} +declare <4 x i32> @llvm.wasm.extmul.low.signed.v4i32(<8 x i16>, <8 x i16>) +define <4 x i32> @extmul_low_s_v4i32(<8 x i16> %x, <8 x i16> %y) { + %a = call <4 x i32> @llvm.wasm.extmul.low.signed.v4i32( + <8 x i16> %x, <8 x i16> %y + ) + ret <4 x i32> %a +} + +; CHECK-LABEL: extmul_high_s_v4i32: +; SIMD128-NEXT: .functype extmul_high_s_v4i32 (v128, v128) -> (v128){{$}} +; SIMD128-NEXT: i32x4.extmul_high_i16x8_s $push[[R:[0-9]+]]=, $0, $1{{$}} +; SIMD128-NEXT: return $pop[[R]]{{$}} +declare <4 x i32> @llvm.wasm.extmul.high.signed.v4i32(<8 x i16>, <8 x i16>) +define <4 x i32> @extmul_high_s_v4i32(<8 x i16> %x, <8 x i16> %y) { + %a = call <4 x i32> @llvm.wasm.extmul.high.signed.v4i32( + <8 x i16> %x, <8 x i16> %y + ) + ret <4 x i32> %a +} + +; CHECK-LABEL: extmul_low_u_v4i32: +; SIMD128-NEXT: .functype extmul_low_u_v4i32 (v128, v128) -> (v128){{$}} +; SIMD128-NEXT: i32x4.extmul_low_i16x8_u $push[[R:[0-9]+]]=, $0, $1{{$}} +; SIMD128-NEXT: return $pop[[R]]{{$}} +declare <4 x i32> @llvm.wasm.extmul.low.unsigned.v4i32(<8 x i16>, <8 x i16>) +define <4 x i32> @extmul_low_u_v4i32(<8 x i16> %x, <8 x i16> %y) { + %a = call <4 x i32> @llvm.wasm.extmul.low.unsigned.v4i32( + <8 x i16> %x, <8 x i16> %y + ) + ret <4 x i32> %a +} + +; CHECK-LABEL: extmul_high_u_v4i32: +; SIMD128-NEXT: .functype extmul_high_u_v4i32 (v128, v128) -> (v128){{$}} +; SIMD128-NEXT: i32x4.extmul_high_i16x8_u $push[[R:[0-9]+]]=, $0, $1{{$}} +; SIMD128-NEXT: return $pop[[R]]{{$}} +declare <4 x i32> @llvm.wasm.extmul.high.unsigned.v4i32(<8 x i16>, <8 x i16>) +define <4 x i32> @extmul_high_u_v4i32(<8 x i16> %x, <8 x i16> %y) { + %a = call <4 x i32> @llvm.wasm.extmul.high.unsigned.v4i32( + <8 x i16> %x, <8 x i16> %y + ) + ret <4 x i32> %a +} + ; CHECK-LABEL: any_v4i32: ; SIMD128-NEXT: .functype any_v4i32 (v128) -> (i32){{$}} ; SIMD128-NEXT: i32x4.any_true $push[[R:[0-9]+]]=, $0{{$}} @@ -395,6 +492,54 @@ ; ============================================================================== ; 2 x i64 ; ============================================================================== +; CHECK-LABEL: extmul_low_s_v2i64: +; SIMD128-NEXT: .functype extmul_low_s_v2i64 (v128, v128) -> (v128){{$}} +; SIMD128-NEXT: i64x2.extmul_low_i32x4_s $push[[R:[0-9]+]]=, $0, $1{{$}} +; SIMD128-NEXT: return $pop[[R]]{{$}} +declare <2 x i64> @llvm.wasm.extmul.low.signed.v2i64(<4 x i32>, <4 x i32>) +define <2 x i64> @extmul_low_s_v2i64(<4 x i32> %x, <4 x i32> %y) { + %a = call <2 x i64> @llvm.wasm.extmul.low.signed.v2i64( + <4 x i32> %x, <4 x i32> %y + ) + ret <2 x i64> %a +} + +; CHECK-LABEL: extmul_high_s_v2i64: +; SIMD128-NEXT: .functype extmul_high_s_v2i64 (v128, v128) -> (v128){{$}} +; SIMD128-NEXT: i64x2.extmul_high_i32x4_s $push[[R:[0-9]+]]=, $0, $1{{$}} +; SIMD128-NEXT: return $pop[[R]]{{$}} +declare <2 x i64> @llvm.wasm.extmul.high.signed.v2i64(<4 x i32>, <4 x i32>) +define <2 x i64> @extmul_high_s_v2i64(<4 x i32> %x, <4 x i32> %y) { + %a = call <2 x i64> @llvm.wasm.extmul.high.signed.v2i64( + <4 x i32> %x, <4 x i32> %y + ) + ret <2 x i64> %a +} + +; CHECK-LABEL: extmul_low_u_v2i64: +; SIMD128-NEXT: .functype extmul_low_u_v2i64 (v128, v128) -> (v128){{$}} +; SIMD128-NEXT: i64x2.extmul_low_i32x4_u $push[[R:[0-9]+]]=, $0, $1{{$}} +; SIMD128-NEXT: return $pop[[R]]{{$}} +declare <2 x i64> @llvm.wasm.extmul.low.unsigned.v2i64(<4 x i32>, <4 x i32>) +define <2 x i64> @extmul_low_u_v2i64(<4 x i32> %x, <4 x i32> %y) { + %a = call <2 x i64> @llvm.wasm.extmul.low.unsigned.v2i64( + <4 x i32> %x, <4 x i32> %y + ) + ret <2 x i64> %a +} + +; CHECK-LABEL: extmul_high_u_v2i64: +; SIMD128-NEXT: .functype extmul_high_u_v2i64 (v128, v128) -> (v128){{$}} +; SIMD128-NEXT: i64x2.extmul_high_i32x4_u $push[[R:[0-9]+]]=, $0, $1{{$}} +; SIMD128-NEXT: return $pop[[R]]{{$}} +declare <2 x i64> @llvm.wasm.extmul.high.unsigned.v2i64(<4 x i32>, <4 x i32>) +define <2 x i64> @extmul_high_u_v2i64(<4 x i32> %x, <4 x i32> %y) { + %a = call <2 x i64> @llvm.wasm.extmul.high.unsigned.v2i64( + <4 x i32> %x, <4 x i32> %y + ) + ret <2 x i64> %a +} + ; CHECK-LABEL: any_v2i64: ; SIMD128-NEXT: .functype any_v2i64 (v128) -> (i32){{$}} ; SIMD128-NEXT: i64x2.any_true $push[[R:[0-9]+]]=, $0{{$}} diff --git a/llvm/test/MC/WebAssembly/simd-encodings.s b/llvm/test/MC/WebAssembly/simd-encodings.s --- a/llvm/test/MC/WebAssembly/simd-encodings.s +++ b/llvm/test/MC/WebAssembly/simd-encodings.s @@ -658,4 +658,40 @@ # CHECK: f64x2.qfms # encoding: [0xfd,0xff,0x01] f64x2.qfms + # CHECK: i16x8.extmul_low_i8x16_s # encoding: [0xfd,0x9a,0x01] + i16x8.extmul_low_i8x16_s + + # CHECK: i16x8.extmul_high_i8x16_s # encoding: [0xfd,0x9d,0x01] + i16x8.extmul_high_i8x16_s + + # CHECK: i16x8.extmul_low_i8x16_u # encoding: [0xfd,0x9e,0x01] + i16x8.extmul_low_i8x16_u + + # CHECK: i16x8.extmul_high_i8x16_u # encoding: [0xfd,0x9f,0x01] + i16x8.extmul_high_i8x16_u + + # CHECK: i32x4.extmul_low_i16x8_s # encoding: [0xfd,0xbb,0x01] + i32x4.extmul_low_i16x8_s + + # CHECK: i32x4.extmul_high_i16x8_s # encoding: [0xfd,0xbd,0x01] + i32x4.extmul_high_i16x8_s + + # CHECK: i32x4.extmul_low_i16x8_u # encoding: [0xfd,0xbe,0x01] + i32x4.extmul_low_i16x8_u + + # CHECK: i32x4.extmul_high_i16x8_u # encoding: [0xfd,0xbf,0x01] + i32x4.extmul_high_i16x8_u + + # CHECK: i64x2.extmul_low_i32x4_s # encoding: [0xfd,0xd2,0x01] + i64x2.extmul_low_i32x4_s + + # CHECK: i64x2.extmul_high_i32x4_s # encoding: [0xfd,0xd3,0x01] + i64x2.extmul_high_i32x4_s + + # CHECK: i64x2.extmul_low_i32x4_u # encoding: [0xfd,0xd6,0x01] + i64x2.extmul_low_i32x4_u + + # CHECK: i64x2.extmul_high_i32x4_u # encoding: [0xfd,0xd7,0x01] + i64x2.extmul_high_i32x4_u + end_function diff --git a/llvm/utils/TableGen/WebAssemblyDisassemblerEmitter.cpp b/llvm/utils/TableGen/WebAssemblyDisassemblerEmitter.cpp --- a/llvm/utils/TableGen/WebAssemblyDisassemblerEmitter.cpp +++ b/llvm/utils/TableGen/WebAssemblyDisassemblerEmitter.cpp @@ -39,9 +39,15 @@ ->getValue()); if (Opc == 0xFFFFFFFF) continue; // No opcode defined. - assert(Opc <= 0xFFFF); - auto Prefix = Opc >> 8; - Opc = Opc & 0xFF; + assert(Opc <= 0xFFFFFF); + unsigned Prefix; + if (Opc <= 0xFFFF) { + Prefix = Opc >> 8; + Opc = Opc & 0xFF; + } else { + Prefix = Opc >> 16; + Opc = Opc & 0xFFFF; + } auto &CGIP = OpcodeTable[Prefix][Opc]; // All wasm instructions have a StackBased field of type string, we only // want the instructions for which this is "true".