This is an archive of the discontinued LLVM Phabricator instance.

Differential D84820

[WebAssembly] Implement prototype v128.load{32,64}_zero instructions
ClosedPublic

Authored by tlively on Jul 28 2020, 8:27 PM.

Download Raw Diff

Details

Reviewers

aheejin

Commits

rGcb327922101b: [WebAssembly] Implement prototype v128.load{32,64}_zero instructions

Summary

Specified in https://github.com/WebAssembly/simd/pull/237, these
instructions load the first vector lane from memory and zero the other
lanes. Since these instructions are not officially part of the SIMD
proposal, they are only available on an opt-in basis via LLVM
intrinsics and clang builtin functions. If these instructions are
merged to the proposal, this implementation will change so that the
instructions will be generated from normal IR. At that point the
intrinsics and builtin functions would be removed.

This PR also changes the opcodes for the experimental f32x4.qfm{a,s}
instructions because their opcodes conflicted with those of the
v128.load{32,64}_zero instructions. The new opcodes were chosen to
match those used in V8.

Diff Detail

Repository: rG LLVM Github Monorepo

Event Timeline

tlively created this revision.Jul 28 2020, 8:27 PM

Herald added projects: Restricted Project, Restricted Project. · View Herald TranscriptJul 28 2020, 8:27 PM

Herald added subscribers: llvm-commits, cfe-commits, sunfish and 4 others. · View Herald Transcript

tlively requested review of this revision.Jul 28 2020, 8:27 PM

Harbormaster completed remote builds in B66150: Diff 281455.Jul 28 2020, 10:11 PM

Since this changes opcodes, it needs to be landed in concert with the corresponding Binaryen change.

aheejin accepted this revision.Jul 30 2020, 1:50 AM

aheejin added inline comments.

llvm/include/llvm/IR/IntrinsicsWebAssembly.td
198	Can memory accesses be speculatable? The below too

This revision is now accepted and ready to land.Jul 30 2020, 1:50 AM

Remove IntrSpeculatable

Harbormaster completed remote builds in B66448: Diff 282017.Jul 30 2020, 1:20 PM

tlively added inline comments.Jul 30 2020, 5:42 PM

llvm/include/llvm/IR/IntrinsicsWebAssembly.td
198	Hmm, maybe not, and it's definitely move conservative for them not to be.

Renumber i32x4.dot_i16x8_s to match V8 as well

Harbormaster completed remote builds in B66804: Diff 282680.Aug 3 2020, 11:47 AM

This revision was landed with ongoing or failed builds.Aug 3 2020, 1:54 PM

Closed by commit rGcb327922101b: [WebAssembly] Implement prototype v128.load{32,64}_zero instructions (authored by tlively). · Explain Why

This revision was automatically updated to reflect the committed changes.

tlively added a commit: rGcb327922101b: [WebAssembly] Implement prototype v128.load{32,64}_zero instructions.

Revision Contents

Path

Size

clang/

include/

clang/

Basic/

BuiltinsWebAssembly.def

3 lines

lib/

CodeGen/

CGBuiltin.cpp

10 lines

test/

CodeGen/

builtins-wasm.c

12 lines

llvm/

include/

llvm/

IR/

IntrinsicsWebAssembly.td

14 lines

lib/

Target/

WebAssembly/

MCTargetDesc/

WebAssemblyMCTargetDesc.h

2 lines

WebAssemblyISelLowering.cpp

9 lines

WebAssemblyInstrMemory.td

2 lines

WebAssemblyInstrSIMD.td

50 lines

test/

CodeGen/

WebAssembly/

simd-load-zero-offset.ll

228 lines

MC/

WebAssembly/

simd-encodings.s

16 lines

Diff 282719

clang/include/clang/Basic/BuiltinsWebAssembly.def

	Show First 20 Lines • Show All 163 Lines • ▼ Show 20 Lines
	TARGET_BUILTIN(__builtin_wasm_trunc_saturate_s_i32x4_f32x4, "V4iV4f", "nc", "simd128")			TARGET_BUILTIN(__builtin_wasm_trunc_saturate_s_i32x4_f32x4, "V4iV4f", "nc", "simd128")
	TARGET_BUILTIN(__builtin_wasm_trunc_saturate_u_i32x4_f32x4, "V4iV4f", "nc", "simd128")			TARGET_BUILTIN(__builtin_wasm_trunc_saturate_u_i32x4_f32x4, "V4iV4f", "nc", "simd128")

	TARGET_BUILTIN(__builtin_wasm_narrow_s_i8x16_i16x8, "V16cV8sV8s", "nc", "simd128")			TARGET_BUILTIN(__builtin_wasm_narrow_s_i8x16_i16x8, "V16cV8sV8s", "nc", "simd128")
	TARGET_BUILTIN(__builtin_wasm_narrow_u_i8x16_i16x8, "V16cV8sV8s", "nc", "simd128")			TARGET_BUILTIN(__builtin_wasm_narrow_u_i8x16_i16x8, "V16cV8sV8s", "nc", "simd128")
	TARGET_BUILTIN(__builtin_wasm_narrow_s_i16x8_i32x4, "V8sV4iV4i", "nc", "simd128")			TARGET_BUILTIN(__builtin_wasm_narrow_s_i16x8_i32x4, "V8sV4iV4i", "nc", "simd128")
	TARGET_BUILTIN(__builtin_wasm_narrow_u_i16x8_i32x4, "V8sV4iV4i", "nc", "simd128")			TARGET_BUILTIN(__builtin_wasm_narrow_u_i16x8_i32x4, "V8sV4iV4i", "nc", "simd128")

				TARGET_BUILTIN(__builtin_wasm_load32_zero, "V4ii*", "nU", "simd128")
				TARGET_BUILTIN(__builtin_wasm_load64_zero, "V2LLiLLi*", "nU", "simd128")

	#undef BUILTIN			#undef BUILTIN
	#undef TARGET_BUILTIN			#undef TARGET_BUILTIN

clang/lib/CodeGen/CGBuiltin.cpp

This file is larger than 256 KB, so syntax highlighting is disabled by default.

Show First 20 Lines • Show All 16,491 Lines • ▼ Show 20 Lines	case WebAssembly::BI__builtin_wasm_narrow_u_i16x8_i32x4:
break;		break;
default:		default:
llvm_unreachable("unexpected builtin ID");		llvm_unreachable("unexpected builtin ID");
}		}
Function *Callee =		Function *Callee =
CGM.getIntrinsic(IntNo, {ConvertType(E->getType()), Low->getType()});		CGM.getIntrinsic(IntNo, {ConvertType(E->getType()), Low->getType()});
return Builder.CreateCall(Callee, {Low, High});		return Builder.CreateCall(Callee, {Low, High});
}		}
		case WebAssembly::BI__builtin_wasm_load32_zero: {
		Value *Ptr = EmitScalarExpr(E->getArg(0));
		Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_load32_zero);
		return Builder.CreateCall(Callee, {Ptr});
		}
		case WebAssembly::BI__builtin_wasm_load64_zero: {
		Value *Ptr = EmitScalarExpr(E->getArg(0));
		Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_load64_zero);
		return Builder.CreateCall(Callee, {Ptr});
		}
case WebAssembly::BI__builtin_wasm_shuffle_v8x16: {		case WebAssembly::BI__builtin_wasm_shuffle_v8x16: {
Value *Ops[18];		Value *Ops[18];
size_t OpIdx = 0;		size_t OpIdx = 0;
Ops[OpIdx++] = EmitScalarExpr(E->getArg(0));		Ops[OpIdx++] = EmitScalarExpr(E->getArg(0));
Ops[OpIdx++] = EmitScalarExpr(E->getArg(1));		Ops[OpIdx++] = EmitScalarExpr(E->getArg(1));
while (OpIdx < 18) {		while (OpIdx < 18) {
Optional<llvm::APSInt> LaneConst =		Optional<llvm::APSInt> LaneConst =
E->getArg(OpIdx)->getIntegerConstantExpr(getContext());		E->getArg(OpIdx)->getIntegerConstantExpr(getContext());
▲ Show 20 Lines • Show All 254 Lines • Show Last 20 Lines

clang/test/CodeGen/builtins-wasm.c

	Show First 20 Lines • Show All 731 Lines • ▼ Show 20 Lines

	i16x8 narrow_u_i16x8_i32x4(i32x4 low, i32x4 high) {			i16x8 narrow_u_i16x8_i32x4(i32x4 low, i32x4 high) {
	return __builtin_wasm_narrow_u_i16x8_i32x4(low, high);			return __builtin_wasm_narrow_u_i16x8_i32x4(low, high);
	// WEBASSEMBLY: call <8 x i16> @llvm.wasm.narrow.unsigned.v8i16.v4i32(			// WEBASSEMBLY: call <8 x i16> @llvm.wasm.narrow.unsigned.v8i16.v4i32(
	// WEBASSEMBLY-SAME: <4 x i32> %low, <4 x i32> %high)			// WEBASSEMBLY-SAME: <4 x i32> %low, <4 x i32> %high)
	// WEBASSEMBLY: ret			// WEBASSEMBLY: ret
	}			}

				i32x4 load32_zero(int *p) {
				return __builtin_wasm_load32_zero(p);
				// WEBASSEMBLY: call <4 x i32> @llvm.wasm.load32.zero(i32* %p)
				// WEBASSEMBLY: ret
				}

				i64x2 load64_zero(long long *p) {
				return __builtin_wasm_load64_zero(p);
				// WEBASSEMBLY: call <2 x i64> @llvm.wasm.load64.zero(i64* %p)
				// WEBASSEMBLY: ret
				}

	i8x16 swizzle_v8x16(i8x16 x, i8x16 y) {			i8x16 swizzle_v8x16(i8x16 x, i8x16 y) {
	return __builtin_wasm_swizzle_v8x16(x, y);			return __builtin_wasm_swizzle_v8x16(x, y);
	// WEBASSEMBLY: call <16 x i8> @llvm.wasm.swizzle(<16 x i8> %x, <16 x i8> %y)			// WEBASSEMBLY: call <16 x i8> @llvm.wasm.swizzle(<16 x i8> %x, <16 x i8> %y)
	}			}

	i8x16 shuffle(i8x16 x, i8x16 y) {			i8x16 shuffle(i8x16 x, i8x16 y) {
	return __builtin_wasm_shuffle_v8x16(x, y, 0, 1, 2, 3, 4, 5, 6, 7,			return __builtin_wasm_shuffle_v8x16(x, y, 0, 1, 2, 3, 4, 5, 6, 7,
	8, 9, 10, 11, 12, 13, 14, 15);			8, 9, 10, 11, 12, 13, 14, 15);
	// WEBASSEMBLY: call <16 x i8> @llvm.wasm.shuffle(<16 x i8> %x, <16 x i8> %y,			// WEBASSEMBLY: call <16 x i8> @llvm.wasm.shuffle(<16 x i8> %x, <16 x i8> %y,
	// WEBASSEMBLY-SAME: i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,			// WEBASSEMBLY-SAME: i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
	// WEBASSEMBLY-SAME: i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14,			// WEBASSEMBLY-SAME: i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14,
	// WEBASSEMBLY-SAME: i32 15			// WEBASSEMBLY-SAME: i32 15
	// WEBASSEMBLY-NEXT: ret			// WEBASSEMBLY-NEXT: ret
	}			}

llvm/include/llvm/IR/IntrinsicsWebAssembly.td

Show First 20 Lines • Show All 184 Lines • ▼ Show 20 Lines	def int_wasm_trunc :
Intrinsic<[llvm_anyvector_ty],		Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>],		[LLVMMatchType<0>],
[IntrNoMem, IntrSpeculatable]>;		[IntrNoMem, IntrSpeculatable]>;
def int_wasm_nearest :		def int_wasm_nearest :
Intrinsic<[llvm_anyvector_ty],		Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>],		[LLVMMatchType<0>],
[IntrNoMem, IntrSpeculatable]>;		[IntrNoMem, IntrSpeculatable]>;

		// TODO: Replace these intrinsic with normal ISel patterns once the
		// load_zero instructions are merged to the proposal.
		def int_wasm_load32_zero :
		Intrinsic<[llvm_v4i32_ty],
		[LLVMPointerType<llvm_i32_ty>],
		[IntrReadMem, IntrArgMemOnly],
		aheejinUnsubmitted Not Done Reply Inline Actions Can memory accesses be speculatable? The below too aheejin: Can memory accesses be speculatable? The below too
		tlivelyAuthorUnsubmitted Done Reply Inline Actions Hmm, maybe not, and it's definitely move conservative for them not to be. tlively: Hmm, maybe not, and it's definitely move conservative for them not to be.
		"", [SDNPMemOperand]>;

		def int_wasm_load64_zero :
		Intrinsic<[llvm_v2i64_ty],
		[LLVMPointerType<llvm_i64_ty>],
		[IntrReadMem, IntrArgMemOnly],
		"", [SDNPMemOperand]>;

//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
// Thread-local storage intrinsics		// Thread-local storage intrinsics
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

def int_wasm_tls_size :		def int_wasm_tls_size :
Intrinsic<[llvm_anyint_ty],		Intrinsic<[llvm_anyint_ty],
[],		[],
[IntrNoMem, IntrSpeculatable]>;		[IntrNoMem, IntrSpeculatable]>;
Show All 12 Lines

llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h

Show First 20 Lines • Show All 226 Lines • ▼ Show 20 Lines	#define WASM_LOAD_STORE(NAME) \
WASM_LOAD_STORE(ATOMIC_RMW32_U_XOR_I64)		WASM_LOAD_STORE(ATOMIC_RMW32_U_XOR_I64)
WASM_LOAD_STORE(ATOMIC_RMW_XCHG_I32)		WASM_LOAD_STORE(ATOMIC_RMW_XCHG_I32)
WASM_LOAD_STORE(ATOMIC_RMW32_U_XCHG_I64)		WASM_LOAD_STORE(ATOMIC_RMW32_U_XCHG_I64)
WASM_LOAD_STORE(ATOMIC_RMW_CMPXCHG_I32)		WASM_LOAD_STORE(ATOMIC_RMW_CMPXCHG_I32)
WASM_LOAD_STORE(ATOMIC_RMW32_U_CMPXCHG_I64)		WASM_LOAD_STORE(ATOMIC_RMW32_U_CMPXCHG_I64)
WASM_LOAD_STORE(ATOMIC_NOTIFY)		WASM_LOAD_STORE(ATOMIC_NOTIFY)
WASM_LOAD_STORE(ATOMIC_WAIT_I32)		WASM_LOAD_STORE(ATOMIC_WAIT_I32)
WASM_LOAD_STORE(LOAD_SPLAT_v32x4)		WASM_LOAD_STORE(LOAD_SPLAT_v32x4)
		WASM_LOAD_STORE(LOAD_ZERO_v4i32)
return 2;		return 2;
WASM_LOAD_STORE(LOAD_I64)		WASM_LOAD_STORE(LOAD_I64)
WASM_LOAD_STORE(LOAD_F64)		WASM_LOAD_STORE(LOAD_F64)
WASM_LOAD_STORE(STORE_I64)		WASM_LOAD_STORE(STORE_I64)
WASM_LOAD_STORE(STORE_F64)		WASM_LOAD_STORE(STORE_F64)
WASM_LOAD_STORE(ATOMIC_LOAD_I64)		WASM_LOAD_STORE(ATOMIC_LOAD_I64)
WASM_LOAD_STORE(ATOMIC_STORE_I64)		WASM_LOAD_STORE(ATOMIC_STORE_I64)
WASM_LOAD_STORE(ATOMIC_RMW_ADD_I64)		WASM_LOAD_STORE(ATOMIC_RMW_ADD_I64)
WASM_LOAD_STORE(ATOMIC_RMW_SUB_I64)		WASM_LOAD_STORE(ATOMIC_RMW_SUB_I64)
WASM_LOAD_STORE(ATOMIC_RMW_AND_I64)		WASM_LOAD_STORE(ATOMIC_RMW_AND_I64)
WASM_LOAD_STORE(ATOMIC_RMW_OR_I64)		WASM_LOAD_STORE(ATOMIC_RMW_OR_I64)
WASM_LOAD_STORE(ATOMIC_RMW_XOR_I64)		WASM_LOAD_STORE(ATOMIC_RMW_XOR_I64)
WASM_LOAD_STORE(ATOMIC_RMW_XCHG_I64)		WASM_LOAD_STORE(ATOMIC_RMW_XCHG_I64)
WASM_LOAD_STORE(ATOMIC_RMW_CMPXCHG_I64)		WASM_LOAD_STORE(ATOMIC_RMW_CMPXCHG_I64)
WASM_LOAD_STORE(ATOMIC_WAIT_I64)		WASM_LOAD_STORE(ATOMIC_WAIT_I64)
WASM_LOAD_STORE(LOAD_SPLAT_v64x2)		WASM_LOAD_STORE(LOAD_SPLAT_v64x2)
WASM_LOAD_STORE(LOAD_EXTEND_S_v8i16)		WASM_LOAD_STORE(LOAD_EXTEND_S_v8i16)
WASM_LOAD_STORE(LOAD_EXTEND_U_v8i16)		WASM_LOAD_STORE(LOAD_EXTEND_U_v8i16)
WASM_LOAD_STORE(LOAD_EXTEND_S_v4i32)		WASM_LOAD_STORE(LOAD_EXTEND_S_v4i32)
WASM_LOAD_STORE(LOAD_EXTEND_U_v4i32)		WASM_LOAD_STORE(LOAD_EXTEND_U_v4i32)
WASM_LOAD_STORE(LOAD_EXTEND_S_v2i64)		WASM_LOAD_STORE(LOAD_EXTEND_S_v2i64)
WASM_LOAD_STORE(LOAD_EXTEND_U_v2i64)		WASM_LOAD_STORE(LOAD_EXTEND_U_v2i64)
		WASM_LOAD_STORE(LOAD_ZERO_v2i64)
return 3;		return 3;
WASM_LOAD_STORE(LOAD_V128)		WASM_LOAD_STORE(LOAD_V128)
WASM_LOAD_STORE(STORE_V128)		WASM_LOAD_STORE(STORE_V128)
return 4;		return 4;
default:		default:
return -1;		return -1;
}		}
#undef WASM_LOAD_STORE		#undef WASM_LOAD_STORE
▲ Show 20 Lines • Show All 140 Lines • Show Last 20 Lines

llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp

Show First 20 Lines • Show All 669 Lines • ▼ Show 20 Lines	bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
case Intrinsic::wasm_atomic_wait_i64:		case Intrinsic::wasm_atomic_wait_i64:
Info.opc = ISD::INTRINSIC_W_CHAIN;		Info.opc = ISD::INTRINSIC_W_CHAIN;
Info.memVT = MVT::i64;		Info.memVT = MVT::i64;
Info.ptrVal = I.getArgOperand(0);		Info.ptrVal = I.getArgOperand(0);
Info.offset = 0;		Info.offset = 0;
Info.align = Align(8);		Info.align = Align(8);
Info.flags = MachineMemOperand::MOVolatile \| MachineMemOperand::MOLoad;		Info.flags = MachineMemOperand::MOVolatile \| MachineMemOperand::MOLoad;
return true;		return true;
		case Intrinsic::wasm_load32_zero:
		case Intrinsic::wasm_load64_zero:
		Info.opc = ISD::INTRINSIC_W_CHAIN;
		Info.memVT = Intrinsic == Intrinsic::wasm_load32_zero ? MVT::i32 : MVT::i64;
		Info.ptrVal = I.getArgOperand(0);
		Info.offset = 0;
		Info.align = Info.memVT == MVT::i32 ? Align(4) : Align(8);
		Info.flags = MachineMemOperand::MOLoad;
		return true;
default:		default:
return false;		return false;
}		}
}		}

//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
// WebAssembly Lowering private implementation.		// WebAssembly Lowering private implementation.
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
▲ Show 20 Lines • Show All 1,122 Lines • Show Last 20 Lines

llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td

	Show First 20 Lines • Show All 64 Lines • ▼ Show 20 Lines
	defm LOAD_I64 : WebAssemblyLoad<I64, "i64.load", 0x29, []>;			defm LOAD_I64 : WebAssemblyLoad<I64, "i64.load", 0x29, []>;
	defm LOAD_F32 : WebAssemblyLoad<F32, "f32.load", 0x2a, []>;			defm LOAD_F32 : WebAssemblyLoad<F32, "f32.load", 0x2a, []>;
	defm LOAD_F64 : WebAssemblyLoad<F64, "f64.load", 0x2b, []>;			defm LOAD_F64 : WebAssemblyLoad<F64, "f64.load", 0x2b, []>;

	// Select loads with no constant offset.			// Select loads with no constant offset.
	multiclass LoadPatNoOffset<ValueType ty, PatFrag kind, string inst> {			multiclass LoadPatNoOffset<ValueType ty, PatFrag kind, string inst> {
	def : Pat<(ty (kind I32:$addr)), (!cast<NI>(inst # "_A32") 0, 0, I32:$addr)>,			def : Pat<(ty (kind I32:$addr)), (!cast<NI>(inst # "_A32") 0, 0, I32:$addr)>,
	Requires<[HasAddr32]>;			Requires<[HasAddr32]>;
	def : Pat<(ty (kind I64:$addr)), (!cast<NI>(inst # "_A64") 0, 0, I64:$addr)>,			def : Pat<(ty (kind (i64 I64:$addr))), (!cast<NI>(inst # "_A64") 0, 0, I64:$addr)>,
	Requires<[HasAddr64]>;			Requires<[HasAddr64]>;
	}			}

	defm : LoadPatNoOffset<i32, load, "LOAD_I32">;			defm : LoadPatNoOffset<i32, load, "LOAD_I32">;
	defm : LoadPatNoOffset<i64, load, "LOAD_I64">;			defm : LoadPatNoOffset<i64, load, "LOAD_I64">;
	defm : LoadPatNoOffset<f32, load, "LOAD_F32">;			defm : LoadPatNoOffset<f32, load, "LOAD_F32">;
	defm : LoadPatNoOffset<f64, load, "LOAD_F64">;			defm : LoadPatNoOffset<f64, load, "LOAD_F64">;

	▲ Show 20 Lines • Show All 307 Lines • Show Last 20 Lines

llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td

Show First 20 Lines • Show All 157 Lines • ▼ Show 20 Lines
defm : LoadPatImmOff<types[0], !cast<PatFrag>(exts[0]#types[1]), or_is_add,		defm : LoadPatImmOff<types[0], !cast<PatFrag>(exts[0]#types[1]), or_is_add,
"LOAD_EXTEND"#exts[1]#"_"#types[0]>;		"LOAD_EXTEND"#exts[1]#"_"#types[0]>;
defm : LoadPatOffsetOnly<types[0], !cast<PatFrag>(exts[0]#types[1]),		defm : LoadPatOffsetOnly<types[0], !cast<PatFrag>(exts[0]#types[1]),
"LOAD_EXTEND"#exts[1]#"_"#types[0]>;		"LOAD_EXTEND"#exts[1]#"_"#types[0]>;
defm : LoadPatGlobalAddrOffOnly<types[0], !cast<PatFrag>(exts[0]#types[1]),		defm : LoadPatGlobalAddrOffOnly<types[0], !cast<PatFrag>(exts[0]#types[1]),
"LOAD_EXTEND"#exts[1]#"_"#types[0]>;		"LOAD_EXTEND"#exts[1]#"_"#types[0]>;
}		}

		// Load lane into zero vector
		multiclass SIMDLoadZero<ValueType vec_t, string name, bits<32> simdop> {
		let mayLoad = 1, UseNamedOperandTable = 1 in {
		defm LOAD_ZERO_#vec_t#_A32 :
		SIMD_I<(outs V128:$dst),
		(ins P2Align:$p2align, offset32_op:$off, I32:$addr),
		(outs), (ins P2Align:$p2align, offset32_op:$off), [],
		name#"\t$dst, ${off}(${addr})$p2align",
		name#"\t$off$p2align", simdop>;
		defm LOAD_ZERO_#vec_t#_A64 :
		SIMD_I<(outs V128:$dst),
		(ins P2Align:$p2align, offset64_op:$off, I64:$addr),
		(outs), (ins P2Align:$p2align, offset64_op:$off), [],
		name#"\t$dst, ${off}(${addr})$p2align",
		name#"\t$off$p2align", simdop>;
		} // mayLoad = 1, UseNamedOperandTable = 1
		}

		// TODO: Also support v4f32 and v2f64 once the instructions are merged
		// to the proposal
		defm "" : SIMDLoadZero<v4i32, "v128.load32_zero", 252>;
		defm "" : SIMDLoadZero<v2i64, "v128.load64_zero", 253>;

		defm : LoadPatNoOffset<v4i32, int_wasm_load32_zero, "LOAD_ZERO_v4i32">;
		defm : LoadPatNoOffset<v2i64, int_wasm_load64_zero, "LOAD_ZERO_v2i64">;

		defm : LoadPatImmOff<v4i32, int_wasm_load32_zero, regPlusImm, "LOAD_ZERO_v4i32">;
		defm : LoadPatImmOff<v2i64, int_wasm_load64_zero, regPlusImm, "LOAD_ZERO_v2i64">;

		defm : LoadPatImmOff<v4i32, int_wasm_load32_zero, or_is_add, "LOAD_ZERO_v4i32">;
		defm : LoadPatImmOff<v2i64, int_wasm_load64_zero, or_is_add, "LOAD_ZERO_v2i64">;

		defm : LoadPatOffsetOnly<v4i32, int_wasm_load32_zero, "LOAD_ZERO_v4i32">;
		defm : LoadPatOffsetOnly<v2i64, int_wasm_load64_zero, "LOAD_ZERO_v2i64">;

		defm : LoadPatGlobalAddrOffOnly<v4i32, int_wasm_load32_zero, "LOAD_ZERO_v4i32">;
		defm : LoadPatGlobalAddrOffOnly<v2i64, int_wasm_load64_zero, "LOAD_ZERO_v2i64">;

// Store: v128.store		// Store: v128.store
let mayStore = 1, UseNamedOperandTable = 1 in {		let mayStore = 1, UseNamedOperandTable = 1 in {
defm STORE_V128_A32 :		defm STORE_V128_A32 :
SIMD_I<(outs), (ins P2Align:$p2align, offset32_op:$off, I32:$addr, V128:$vec),		SIMD_I<(outs), (ins P2Align:$p2align, offset32_op:$off, I32:$addr, V128:$vec),
(outs), (ins P2Align:$p2align, offset32_op:$off), [],		(outs), (ins P2Align:$p2align, offset32_op:$off), [],
"v128.store\t${off}(${addr})$p2align, $vec",		"v128.store\t${off}(${addr})$p2align, $vec",
"v128.store\t$off$p2align", 11>;		"v128.store\t$off$p2align", 11>;
▲ Show 20 Lines • Show All 621 Lines • ▼ Show 20 Lines	def : Pat<(wasm_shr_u
),		),
(!cast<NI>("AVGR_U_"#nodes[0]) V128:$lhs, V128:$rhs)>;		(!cast<NI>("AVGR_U_"#nodes[0]) V128:$lhs, V128:$rhs)>;

// Widening dot product: i32x4.dot_i16x8_s		// Widening dot product: i32x4.dot_i16x8_s
let isCommutable = 1 in		let isCommutable = 1 in
defm DOT : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs), (outs), (ins),		defm DOT : SIMD_I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs), (outs), (ins),
[(set V128:$dst, (int_wasm_dot V128:$lhs, V128:$rhs))],		[(set V128:$dst, (int_wasm_dot V128:$lhs, V128:$rhs))],
"i32x4.dot_i16x8_s\t$dst, $lhs, $rhs", "i32x4.dot_i16x8_s",		"i32x4.dot_i16x8_s\t$dst, $lhs, $rhs", "i32x4.dot_i16x8_s",
180>;		186>;

//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
// Floating-point unary arithmetic		// Floating-point unary arithmetic
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

multiclass SIMDUnaryFP<SDNode node, string name, bits<32> baseInst> {		multiclass SIMDUnaryFP<SDNode node, string name, bits<32> baseInst> {
defm "" : SIMDUnary<v4f32, "f32x4", node, name, baseInst>;		defm "" : SIMDUnary<v4f32, "f32x4", node, name, baseInst>;
defm "" : SIMDUnary<v2f64, "f64x2", node, name, !add(baseInst, 12)>;		defm "" : SIMDUnary<v2f64, "f64x2", node, name, !add(baseInst, 12)>;
▲ Show 20 Lines • Show All 221 Lines • ▼ Show 20 Lines	foreach t2 = !foldl(
)		)
) in		) in
def : Pat<(t1 (bitconvert (t2 V128:$v))), (t1 V128:$v)>;		def : Pat<(t1 (bitconvert (t2 V128:$v))), (t1 V128:$v)>;

//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//
// Quasi-Fused Multiply- Add and Subtract (QFMA/QFMS)		// Quasi-Fused Multiply- Add and Subtract (QFMA/QFMS)
//===----------------------------------------------------------------------===//		//===----------------------------------------------------------------------===//

multiclass SIMDQFM<ValueType vec_t, string vec, bits<32> baseInst> {		multiclass SIMDQFM<ValueType vec_t, string vec, bits<32> simdopA,
		bits<32> simdopS> {
defm QFMA_#vec_t :		defm QFMA_#vec_t :
SIMD_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c),		SIMD_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c),
(outs), (ins),		(outs), (ins),
[(set (vec_t V128:$dst),		[(set (vec_t V128:$dst),
(int_wasm_qfma (vec_t V128:$a), (vec_t V128:$b), (vec_t V128:$c)))],		(int_wasm_qfma (vec_t V128:$a), (vec_t V128:$b), (vec_t V128:$c)))],
vec#".qfma\t$dst, $a, $b, $c", vec#".qfma", baseInst>;		vec#".qfma\t$dst, $a, $b, $c", vec#".qfma", simdopA>;
defm QFMS_#vec_t :		defm QFMS_#vec_t :
SIMD_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c),		SIMD_I<(outs V128:$dst), (ins V128:$a, V128:$b, V128:$c),
(outs), (ins),		(outs), (ins),
[(set (vec_t V128:$dst),		[(set (vec_t V128:$dst),
(int_wasm_qfms (vec_t V128:$a), (vec_t V128:$b), (vec_t V128:$c)))],		(int_wasm_qfms (vec_t V128:$a), (vec_t V128:$b), (vec_t V128:$c)))],
vec#".qfms\t$dst, $a, $b, $c", vec#".qfms", !add(baseInst, 1)>;		vec#".qfms\t$dst, $a, $b, $c", vec#".qfms", simdopS>;
}		}

defm "" : SIMDQFM<v4f32, "f32x4", 252>;		defm "" : SIMDQFM<v4f32, "f32x4", 180, 212>;
defm "" : SIMDQFM<v2f64, "f64x2", 254>;		defm "" : SIMDQFM<v2f64, "f64x2", 254, 255>;

llvm/test/CodeGen/WebAssembly/simd-load-zero-offset.ll

This file was added.

				; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
				; RUN: llc < %s -verify-machineinstrs -mattr=+simd128 \| FileCheck %s

				; Test SIMD v128.load{32,64}_zero instructions

				target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
				target triple = "wasm32-unknown-unknown"

				declare <4 x i32> @llvm.wasm.load32.zero(i32*)
				declare <2 x i64> @llvm.wasm.load64.zero(i64*)

				;===----------------------------------------------------------------------------
				; v128.load32_zero
				;===----------------------------------------------------------------------------

				define <4 x i32> @load_zero_i32_no_offset(i32* %p) {
				; CHECK-LABEL: load_zero_i32_no_offset:
				; CHECK: .functype load_zero_i32_no_offset (i32) -> (v128)
				; CHECK-NEXT: # %bb.0:
				; CHECK-NEXT: local.get 0
				; CHECK-NEXT: v128.load32_zero 0
				; CHECK-NEXT: # fallthrough-return
				%v = tail call <4 x i32> @llvm.wasm.load32.zero(i32* %p)
				ret <4 x i32> %v
				}

				define <4 x i32> @load_zero_i32_with_folded_offset(i32* %p) {
				; CHECK-LABEL: load_zero_i32_with_folded_offset:
				; CHECK: .functype load_zero_i32_with_folded_offset (i32) -> (v128)
				; CHECK-NEXT: # %bb.0:
				; CHECK-NEXT: local.get 0
				; CHECK-NEXT: v128.load32_zero 24
				; CHECK-NEXT: # fallthrough-return
				%q = ptrtoint i32* %p to i32
				%r = add nuw i32 %q, 24
				%s = inttoptr i32 %r to i32*
				%t = tail call <4 x i32> @llvm.wasm.load32.zero(i32* %s)
				ret <4 x i32> %t
				}

				define <4 x i32> @load_zero_i32_with_folded_gep_offset(i32* %p) {
				; CHECK-LABEL: load_zero_i32_with_folded_gep_offset:
				; CHECK: .functype load_zero_i32_with_folded_gep_offset (i32) -> (v128)
				; CHECK-NEXT: # %bb.0:
				; CHECK-NEXT: local.get 0
				; CHECK-NEXT: v128.load32_zero 24
				; CHECK-NEXT: # fallthrough-return
				%s = getelementptr inbounds i32, i32* %p, i32 6
				%t = tail call <4 x i32> @llvm.wasm.load32.zero(i32* %s)
				ret <4 x i32> %t
				}

				define <4 x i32> @load_zero_i32_with_unfolded_gep_negative_offset(i32* %p) {
				; CHECK-LABEL: load_zero_i32_with_unfolded_gep_negative_offset:
				; CHECK: .functype load_zero_i32_with_unfolded_gep_negative_offset (i32) -> (v128)
				; CHECK-NEXT: # %bb.0:
				; CHECK-NEXT: local.get 0
				; CHECK-NEXT: i32.const -24
				; CHECK-NEXT: i32.add
				; CHECK-NEXT: v128.load32_zero 0
				; CHECK-NEXT: # fallthrough-return
				%s = getelementptr inbounds i32, i32* %p, i32 -6
				%t = tail call <4 x i32> @llvm.wasm.load32.zero(i32* %s)
				ret <4 x i32> %t
				}

				define <4 x i32> @load_zero_i32_with_unfolded_offset(i32* %p) {
				; CHECK-LABEL: load_zero_i32_with_unfolded_offset:
				; CHECK: .functype load_zero_i32_with_unfolded_offset (i32) -> (v128)
				; CHECK-NEXT: # %bb.0:
				; CHECK-NEXT: local.get 0
				; CHECK-NEXT: i32.const 24
				; CHECK-NEXT: i32.add
				; CHECK-NEXT: v128.load32_zero 0
				; CHECK-NEXT: # fallthrough-return
				%q = ptrtoint i32* %p to i32
				%r = add nsw i32 %q, 24
				%s = inttoptr i32 %r to i32*
				%t = tail call <4 x i32> @llvm.wasm.load32.zero(i32* %s)
				ret <4 x i32> %t
				}

				define <4 x i32> @load_zero_i32_with_unfolded_gep_offset(i32* %p) {
				; CHECK-LABEL: load_zero_i32_with_unfolded_gep_offset:
				; CHECK: .functype load_zero_i32_with_unfolded_gep_offset (i32) -> (v128)
				; CHECK-NEXT: # %bb.0:
				; CHECK-NEXT: local.get 0
				; CHECK-NEXT: i32.const 24
				; CHECK-NEXT: i32.add
				; CHECK-NEXT: v128.load32_zero 0
				; CHECK-NEXT: # fallthrough-return
				%s = getelementptr i32, i32* %p, i32 6
				%t = tail call <4 x i32> @llvm.wasm.load32.zero(i32* %s)
				ret <4 x i32> %t
				}

				define <4 x i32> @load_zero_i32_from_numeric_address() {
				; CHECK-LABEL: load_zero_i32_from_numeric_address:
				; CHECK: .functype load_zero_i32_from_numeric_address () -> (v128)
				; CHECK-NEXT: # %bb.0:
				; CHECK-NEXT: i32.const 0
				; CHECK-NEXT: v128.load32_zero 42
				; CHECK-NEXT: # fallthrough-return
				%s = inttoptr i32 42 to i32*
				%t = tail call <4 x i32> @llvm.wasm.load32.zero(i32* %s)
				ret <4 x i32> %t
				}

				@gv_i32 = global i32 0
				define <4 x i32> @load_zero_i32_from_global_address() {
				; CHECK-LABEL: load_zero_i32_from_global_address:
				; CHECK: .functype load_zero_i32_from_global_address () -> (v128)
				; CHECK-NEXT: # %bb.0:
				; CHECK-NEXT: i32.const 0
				; CHECK-NEXT: v128.load32_zero gv_i32
				; CHECK-NEXT: # fallthrough-return
				%t = tail call <4 x i32> @llvm.wasm.load32.zero(i32* @gv_i32)
				ret <4 x i32> %t
				}

				;===----------------------------------------------------------------------------
				; v128.load64_zero
				;===----------------------------------------------------------------------------

				define <2 x i64> @load_zero_i64_no_offset(i64* %p) {
				; CHECK-LABEL: load_zero_i64_no_offset:
				; CHECK: .functype load_zero_i64_no_offset (i32) -> (v128)
				; CHECK-NEXT: # %bb.0:
				; CHECK-NEXT: local.get 0
				; CHECK-NEXT: v128.load64_zero 0
				; CHECK-NEXT: # fallthrough-return
				%v = tail call <2 x i64> @llvm.wasm.load64.zero(i64* %p)
				ret <2 x i64> %v
				}

				define <2 x i64> @load_zero_i64_with_folded_offset(i64* %p) {
				; CHECK-LABEL: load_zero_i64_with_folded_offset:
				; CHECK: .functype load_zero_i64_with_folded_offset (i32) -> (v128)
				; CHECK-NEXT: # %bb.0:
				; CHECK-NEXT: local.get 0
				; CHECK-NEXT: v128.load64_zero 24
				; CHECK-NEXT: # fallthrough-return
				%q = ptrtoint i64* %p to i32
				%r = add nuw i32 %q, 24
				%s = inttoptr i32 %r to i64*
				%t = tail call <2 x i64> @llvm.wasm.load64.zero(i64* %s)
				ret <2 x i64> %t
				}

				define <2 x i64> @load_zero_i64_with_folded_gep_offset(i64* %p) {
				; CHECK-LABEL: load_zero_i64_with_folded_gep_offset:
				; CHECK: .functype load_zero_i64_with_folded_gep_offset (i32) -> (v128)
				; CHECK-NEXT: # %bb.0:
				; CHECK-NEXT: local.get 0
				; CHECK-NEXT: v128.load64_zero 48
				; CHECK-NEXT: # fallthrough-return
				%s = getelementptr inbounds i64, i64* %p, i64 6
				%t = tail call <2 x i64> @llvm.wasm.load64.zero(i64* %s)
				ret <2 x i64> %t
				}

				define <2 x i64> @load_zero_i64_with_unfolded_gep_negative_offset(i64* %p) {
				; CHECK-LABEL: load_zero_i64_with_unfolded_gep_negative_offset:
				; CHECK: .functype load_zero_i64_with_unfolded_gep_negative_offset (i32) -> (v128)
				; CHECK-NEXT: # %bb.0:
				; CHECK-NEXT: local.get 0
				; CHECK-NEXT: i32.const -48
				; CHECK-NEXT: i32.add
				; CHECK-NEXT: v128.load64_zero 0
				; CHECK-NEXT: # fallthrough-return
				%s = getelementptr inbounds i64, i64* %p, i64 -6
				%t = tail call <2 x i64> @llvm.wasm.load64.zero(i64* %s)
				ret <2 x i64> %t
				}

				define <2 x i64> @load_zero_i64_with_unfolded_offset(i64* %p) {
				; CHECK-LABEL: load_zero_i64_with_unfolded_offset:
				; CHECK: .functype load_zero_i64_with_unfolded_offset (i32) -> (v128)
				; CHECK-NEXT: # %bb.0:
				; CHECK-NEXT: local.get 0
				; CHECK-NEXT: i32.const 24
				; CHECK-NEXT: i32.add
				; CHECK-NEXT: v128.load64_zero 0
				; CHECK-NEXT: # fallthrough-return
				%q = ptrtoint i64* %p to i32
				%r = add nsw i32 %q, 24
				%s = inttoptr i32 %r to i64*
				%t = tail call <2 x i64> @llvm.wasm.load64.zero(i64* %s)
				ret <2 x i64> %t
				}

				define <2 x i64> @load_zero_i64_with_unfolded_gep_offset(i64* %p) {
				; CHECK-LABEL: load_zero_i64_with_unfolded_gep_offset:
				; CHECK: .functype load_zero_i64_with_unfolded_gep_offset (i32) -> (v128)
				; CHECK-NEXT: # %bb.0:
				; CHECK-NEXT: local.get 0
				; CHECK-NEXT: i32.const 48
				; CHECK-NEXT: i32.add
				; CHECK-NEXT: v128.load64_zero 0
				; CHECK-NEXT: # fallthrough-return
				%s = getelementptr i64, i64* %p, i64 6
				%t = tail call <2 x i64> @llvm.wasm.load64.zero(i64* %s)
				ret <2 x i64> %t
				}

				define <2 x i64> @load_zero_i64_from_numeric_address() {
				; CHECK-LABEL: load_zero_i64_from_numeric_address:
				; CHECK: .functype load_zero_i64_from_numeric_address () -> (v128)
				; CHECK-NEXT: # %bb.0:
				; CHECK-NEXT: i32.const 0
				; CHECK-NEXT: v128.load64_zero 42
				; CHECK-NEXT: # fallthrough-return
				%s = inttoptr i32 42 to i64*
				%t = tail call <2 x i64> @llvm.wasm.load64.zero(i64* %s)
				ret <2 x i64> %t
				}

				@gv_i64 = global i64 0
				define <2 x i64> @load_zero_i64_from_global_address() {
				; CHECK-LABEL: load_zero_i64_from_global_address:
				; CHECK: .functype load_zero_i64_from_global_address () -> (v128)
				; CHECK-NEXT: # %bb.0:
				; CHECK-NEXT: i32.const 0
				; CHECK-NEXT: v128.load64_zero gv_i64
				; CHECK-NEXT: # fallthrough-return
				%t = tail call <2 x i64> @llvm.wasm.load64.zero(i64* @gv_i64)
				ret <2 x i64> %t
				}

llvm/test/MC/WebAssembly/simd-encodings.s

Show First 20 Lines • Show All 457 Lines • ▼ Show 20 Lines	main:
i32x4.shr_u		i32x4.shr_u

# CHECK: i32x4.add # encoding: [0xfd,0xae,0x01]		# CHECK: i32x4.add # encoding: [0xfd,0xae,0x01]
i32x4.add		i32x4.add

# CHECK: i32x4.sub # encoding: [0xfd,0xb1,0x01]		# CHECK: i32x4.sub # encoding: [0xfd,0xb1,0x01]
i32x4.sub		i32x4.sub

# CHECK: i32x4.dot_i16x8_s # encoding: [0xfd,0xb4,0x01]
i32x4.dot_i16x8_s

# CHECK: i32x4.mul # encoding: [0xfd,0xb5,0x01]		# CHECK: i32x4.mul # encoding: [0xfd,0xb5,0x01]
i32x4.mul		i32x4.mul

# CHECK: i32x4.min_s # encoding: [0xfd,0xb6,0x01]		# CHECK: i32x4.min_s # encoding: [0xfd,0xb6,0x01]
i32x4.min_s		i32x4.min_s

# CHECK: i32x4.min_u # encoding: [0xfd,0xb7,0x01]		# CHECK: i32x4.min_u # encoding: [0xfd,0xb7,0x01]
i32x4.min_u		i32x4.min_u

# CHECK: i32x4.max_s # encoding: [0xfd,0xb8,0x01]		# CHECK: i32x4.max_s # encoding: [0xfd,0xb8,0x01]
i32x4.max_s		i32x4.max_s

# CHECK: i32x4.max_u # encoding: [0xfd,0xb9,0x01]		# CHECK: i32x4.max_u # encoding: [0xfd,0xb9,0x01]
i32x4.max_u		i32x4.max_u

		# CHECK: i32x4.dot_i16x8_s # encoding: [0xfd,0xba,0x01]
		i32x4.dot_i16x8_s

# CHECK: i64x2.neg # encoding: [0xfd,0xc1,0x01]		# CHECK: i64x2.neg # encoding: [0xfd,0xc1,0x01]
i64x2.neg		i64x2.neg

# CHECK: i64x2.any_true # encoding: [0xfd,0xc2,0x01]		# CHECK: i64x2.any_true # encoding: [0xfd,0xc2,0x01]
i64x2.any_true		i64x2.any_true

# CHECK: i64x2.all_true # encoding: [0xfd,0xc3,0x01]		# CHECK: i64x2.all_true # encoding: [0xfd,0xc3,0x01]
i64x2.all_true		i64x2.all_true
▲ Show 20 Lines • Show All 113 Lines • ▼ Show 20 Lines	main:
i32x4.trunc_sat_f32x4_u		i32x4.trunc_sat_f32x4_u

# CHECK: f32x4.convert_i32x4_s # encoding: [0xfd,0xfa,0x01]		# CHECK: f32x4.convert_i32x4_s # encoding: [0xfd,0xfa,0x01]
f32x4.convert_i32x4_s		f32x4.convert_i32x4_s

# CHECK: f32x4.convert_i32x4_u # encoding: [0xfd,0xfb,0x01]		# CHECK: f32x4.convert_i32x4_u # encoding: [0xfd,0xfb,0x01]
f32x4.convert_i32x4_u		f32x4.convert_i32x4_u

# CHECK: f32x4.qfma # encoding: [0xfd,0xfc,0x01]		# CHECK: v128.load32_zero 32 # encoding: [0xfd,0xfc,0x01,0x02,0x20]
		v128.load32_zero 32

		# CHECK: v128.load64_zero 32 # encoding: [0xfd,0xfd,0x01,0x03,0x20]
		v128.load64_zero 32

		# CHECK: f32x4.qfma # encoding: [0xfd,0xb4,0x01]
f32x4.qfma		f32x4.qfma

# CHECK: f32x4.qfms # encoding: [0xfd,0xfd,0x01]		# CHECK: f32x4.qfms # encoding: [0xfd,0xd4,0x01]
f32x4.qfms		f32x4.qfms

# CHECK: f64x2.qfma # encoding: [0xfd,0xfe,0x01]		# CHECK: f64x2.qfma # encoding: [0xfd,0xfe,0x01]
f64x2.qfma		f64x2.qfma

# CHECK: f64x2.qfms # encoding: [0xfd,0xff,0x01]		# CHECK: f64x2.qfms # encoding: [0xfd,0xff,0x01]
f64x2.qfms		f64x2.qfms

end_function		end_function

This is an archive of the discontinued LLVM Phabricator instance.

[WebAssembly] Implement prototype v128.load{32,64}_zero instructionsClosedPublic

Details

Diff Detail

Event Timeline

Revision Contents

Diff 282719

clang/include/clang/Basic/BuiltinsWebAssembly.def

clang/lib/CodeGen/CGBuiltin.cpp

clang/test/CodeGen/builtins-wasm.c

llvm/include/llvm/IR/IntrinsicsWebAssembly.td

llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h

llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp

llvm/lib/Target/WebAssembly/WebAssemblyInstrMemory.td

llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td

llvm/test/CodeGen/WebAssembly/simd-load-zero-offset.ll

llvm/test/MC/WebAssembly/simd-encodings.s

[WebAssembly] Implement prototype v128.load{32,64}_zero instructions
ClosedPublic