diff --git a/clang/include/clang/Basic/BuiltinsWebAssembly.def b/clang/include/clang/Basic/BuiltinsWebAssembly.def --- a/clang/include/clang/Basic/BuiltinsWebAssembly.def +++ b/clang/include/clang/Basic/BuiltinsWebAssembly.def @@ -192,8 +192,5 @@ TARGET_BUILTIN(__builtin_wasm_trunc_sat_zero_s_f64x2_i32x4, "V4iV2d", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_trunc_sat_zero_u_f64x2_i32x4, "V4UiV2d", "nc", "simd128") -TARGET_BUILTIN(__builtin_wasm_load32_zero, "V4iiC*", "n", "simd128") -TARGET_BUILTIN(__builtin_wasm_load64_zero, "V2LLiLLiC*", "n", "simd128") - #undef BUILTIN #undef TARGET_BUILTIN diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -17967,16 +17967,6 @@ Builder.getInt32(2), Builder.getInt32(3)}); return Builder.CreateShuffleVector(Trunc, Splat, ConcatMask); } - case WebAssembly::BI__builtin_wasm_load32_zero: { - Value *Ptr = EmitScalarExpr(E->getArg(0)); - Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_load32_zero); - return Builder.CreateCall(Callee, {Ptr}); - } - case WebAssembly::BI__builtin_wasm_load64_zero: { - Value *Ptr = EmitScalarExpr(E->getArg(0)); - Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_load64_zero); - return Builder.CreateCall(Callee, {Ptr}); - } case WebAssembly::BI__builtin_wasm_shuffle_i8x16: { Value *Ops[18]; size_t OpIdx = 0; diff --git a/clang/test/CodeGen/builtins-wasm.c b/clang/test/CodeGen/builtins-wasm.c --- a/clang/test/CodeGen/builtins-wasm.c +++ b/clang/test/CodeGen/builtins-wasm.c @@ -836,18 +836,6 @@ // WEBASSEMBLY: ret <4 x i32> %1 } -i32x4 load32_zero(const int *p) { - return __builtin_wasm_load32_zero(p); - // WEBASSEMBLY: call <4 x i32> @llvm.wasm.load32.zero(i32* %p) - // WEBASSEMBLY: ret -} - -i64x2 load64_zero(const long long *p) { - return __builtin_wasm_load64_zero(p); - // WEBASSEMBLY: call <2 x i64> @llvm.wasm.load64.zero(i64* %p) - // WEBASSEMBLY: ret -} - i8x16 swizzle_i8x16(i8x16 x, i8x16 y) { return __builtin_wasm_swizzle_i8x16(x, y); // WEBASSEMBLY: call <16 x i8> @llvm.wasm.swizzle(<16 x i8> %x, <16 x i8> %y) diff --git a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td --- a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td +++ b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td @@ -172,20 +172,6 @@ [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem, IntrSpeculatable]>; -// TODO: Replace these intrinsic with normal ISel patterns once the -// load_zero instructions are merged to the proposal. -def int_wasm_load32_zero : - Intrinsic<[llvm_v4i32_ty], - [LLVMPointerType], - [IntrReadMem, IntrArgMemOnly], - "", [SDNPMemOperand]>; - -def int_wasm_load64_zero : - Intrinsic<[llvm_v2i64_ty], - [LLVMPointerType], - [IntrReadMem, IntrArgMemOnly], - "", [SDNPMemOperand]>; - // TODO: Replace this intrinsic with normal ISel patterns once popcnt is merged // to the proposal. def int_wasm_popcnt : diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -758,15 +758,6 @@ Info.align = Align(8); Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad; return true; - case Intrinsic::wasm_load32_zero: - case Intrinsic::wasm_load64_zero: - Info.opc = ISD::INTRINSIC_W_CHAIN; - Info.memVT = Intrinsic == Intrinsic::wasm_load32_zero ? MVT::i32 : MVT::i64; - Info.ptrVal = I.getArgOperand(0); - Info.offset = 0; - Info.align = Align(1); - Info.flags = MachineMemOperand::MOLoad; - return true; default: return false; } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td @@ -264,19 +264,19 @@ } // mayLoad = 1, UseNamedOperandTable = 1 } -// TODO: Also support v4f32 and v2f64 once the instructions are merged -// to the proposal defm "" : SIMDLoadZero; defm "" : SIMDLoadZero; +// TODO: f32x4 and f64x2 as well foreach vec = [I32x4, I64x2] in { -defvar loadpat = !cast("int_wasm_load"#vec.lane_bits#"_zero"); -defvar inst = "LOAD_ZERO_"#vec; -defm : LoadPatNoOffset; -defm : LoadPatImmOff; -defm : LoadPatImmOff; -defm : LoadPatOffsetOnly; -defm : LoadPatGlobalAddrOffOnly; + defvar inst = "LOAD_ZERO_"#vec; + defvar pat = PatFrag<(ops node:$ptr), + (vector_insert (vec.splat (vec.lane_vt 0)), (vec.lane_vt (load $ptr)), 0)>; + defm : LoadPatNoOffset; + defm : LoadPatImmOff; + defm : LoadPatImmOff; + defm : LoadPatOffsetOnly; + defm : LoadPatGlobalAddrOffOnly; } // Load lane diff --git a/llvm/test/CodeGen/WebAssembly/simd-load-store-alignment.ll b/llvm/test/CodeGen/WebAssembly/simd-load-store-alignment.ll --- a/llvm/test/CodeGen/WebAssembly/simd-load-store-alignment.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-load-store-alignment.ll @@ -912,6 +912,56 @@ ret void } +define <4 x i32> @load_zero_i32_a1(i32* %p) { +; CHECK-LABEL: load_zero_i32_a1: +; CHECK: .functype load_zero_i32_a1 (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.load32_zero 0:p2align=0 +; CHECK-NEXT: # fallthrough-return + %x = load i32, i32* %p, align 1 + %v = insertelement <4 x i32> zeroinitializer, i32 %x, i32 0 + ret <4 x i32> %v +} + +define <4 x i32> @load_zero_i32_a2(i32* %p) { +; CHECK-LABEL: load_zero_i32_a2: +; CHECK: .functype load_zero_i32_a2 (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.load32_zero 0:p2align=1 +; CHECK-NEXT: # fallthrough-return + %x = load i32, i32* %p, align 2 + %v = insertelement <4 x i32> zeroinitializer, i32 %x, i32 0 + ret <4 x i32> %v +} + +; 4 is the default alignment for v128.load32_zero so no attribute is needed. +define <4 x i32> @load_zero_i32_a4(i32* %p) { +; CHECK-LABEL: load_zero_i32_a4: +; CHECK: .functype load_zero_i32_a4 (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.load32_zero 0 +; CHECK-NEXT: # fallthrough-return + %x = load i32, i32* %p, align 4 + %v = insertelement <4 x i32> zeroinitializer, i32 %x, i32 0 + ret <4 x i32> %v +} + +; 8 is greater than the default alignment so it is ignored. +define <4 x i32> @load_zero_i32_a8(i32* %p) { +; CHECK-LABEL: load_zero_i32_a8: +; CHECK: .functype load_zero_i32_a8 (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.load32_zero 0 +; CHECK-NEXT: # fallthrough-return + %x = load i32, i32* %p, align 8 + %v = insertelement <4 x i32> zeroinitializer, i32 %x, i32 0 + ret <4 x i32> %v +} + ; ============================================================================== ; 2 x i64 ; ============================================================================== @@ -1213,6 +1263,68 @@ ret void } +define <2 x i64> @load_zero_i64_a1(i64* %p) { +; CHECK-LABEL: load_zero_i64_a1: +; CHECK: .functype load_zero_i64_a1 (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.load64_zero 0:p2align=0 +; CHECK-NEXT: # fallthrough-return + %x = load i64, i64* %p, align 1 + %v = insertelement <2 x i64> zeroinitializer, i64 %x, i32 0 + ret <2 x i64> %v +} + +define <2 x i64> @load_zero_i64_a2(i64* %p) { +; CHECK-LABEL: load_zero_i64_a2: +; CHECK: .functype load_zero_i64_a2 (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.load64_zero 0:p2align=1 +; CHECK-NEXT: # fallthrough-return + %x = load i64, i64* %p, align 2 + %v = insertelement <2 x i64> zeroinitializer, i64 %x, i32 0 + ret <2 x i64> %v +} + +define <2 x i64> @load_zero_i64_a4(i64* %p) { +; CHECK-LABEL: load_zero_i64_a4: +; CHECK: .functype load_zero_i64_a4 (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.load64_zero 0:p2align=2 +; CHECK-NEXT: # fallthrough-return + %x = load i64, i64* %p, align 4 + %v = insertelement <2 x i64> zeroinitializer, i64 %x, i32 0 + ret <2 x i64> %v +} + +; 8 is the default alignment for v128.load64_zero so no attribute is needed. +define <2 x i64> @load_zero_i64_a8(i64* %p) { +; CHECK-LABEL: load_zero_i64_a8: +; CHECK: .functype load_zero_i64_a8 (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.load64_zero 0 +; CHECK-NEXT: # fallthrough-return + %x = load i64, i64* %p, align 8 + %v = insertelement <2 x i64> zeroinitializer, i64 %x, i32 0 + ret <2 x i64> %v +} + +; 16 is greater than the default alignment so it is ignored. +define <2 x i64> @load_zero_i64_a16(i64* %p) { +; CHECK-LABEL: load_zero_i64_a16: +; CHECK: .functype load_zero_i64_a16 (i32) -> (v128) +; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.load64_zero 0 +; CHECK-NEXT: # fallthrough-return + %x = load i64, i64* %p, align 16 + %v = insertelement <2 x i64> zeroinitializer, i64 %x, i32 0 + ret <2 x i64> %v +} + ; ============================================================================== ; 4 x float ; ============================================================================== diff --git a/llvm/test/CodeGen/WebAssembly/simd-load-zero-offset.ll b/llvm/test/CodeGen/WebAssembly/simd-load-zero-offset.ll --- a/llvm/test/CodeGen/WebAssembly/simd-load-zero-offset.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-load-zero-offset.ll @@ -5,9 +5,6 @@ target triple = "wasm32-unknown-unknown" -declare <4 x i32> @llvm.wasm.load32.zero(i32*) -declare <2 x i64> @llvm.wasm.load64.zero(i64*) - ;===---------------------------------------------------------------------------- ; v128.load32_zero ;===---------------------------------------------------------------------------- @@ -17,9 +14,10 @@ ; CHECK: .functype load_zero_i32_no_offset (i32) -> (v128) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v128.load32_zero 0:p2align=0 +; CHECK-NEXT: v128.load32_zero 0 ; CHECK-NEXT: # fallthrough-return - %v = tail call <4 x i32> @llvm.wasm.load32.zero(i32* %p) + %x = load i32, i32* %p + %v = insertelement <4 x i32> zeroinitializer, i32 %x, i32 0 ret <4 x i32> %v } @@ -28,12 +26,13 @@ ; CHECK: .functype load_zero_i32_with_folded_offset (i32) -> (v128) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v128.load32_zero 24:p2align=0 +; CHECK-NEXT: v128.load32_zero 24 ; CHECK-NEXT: # fallthrough-return %q = ptrtoint i32* %p to i32 %r = add nuw i32 %q, 24 %s = inttoptr i32 %r to i32* - %t = tail call <4 x i32> @llvm.wasm.load32.zero(i32* %s) + %x = load i32, i32* %s + %t = insertelement <4 x i32> zeroinitializer, i32 %x, i32 0 ret <4 x i32> %t } @@ -42,10 +41,11 @@ ; CHECK: .functype load_zero_i32_with_folded_gep_offset (i32) -> (v128) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v128.load32_zero 24:p2align=0 +; CHECK-NEXT: v128.load32_zero 24 ; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds i32, i32* %p, i32 6 - %t = tail call <4 x i32> @llvm.wasm.load32.zero(i32* %s) + %x = load i32, i32* %s + %t = insertelement <4 x i32> zeroinitializer, i32 %x, i32 0 ret <4 x i32> %t } @@ -56,10 +56,11 @@ ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32.const -24 ; CHECK-NEXT: i32.add -; CHECK-NEXT: v128.load32_zero 0:p2align=0 +; CHECK-NEXT: v128.load32_zero 0 ; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds i32, i32* %p, i32 -6 - %t = tail call <4 x i32> @llvm.wasm.load32.zero(i32* %s) + %x = load i32, i32* %s + %t = insertelement <4 x i32> zeroinitializer, i32 %x, i32 0 ret <4 x i32> %t } @@ -70,12 +71,13 @@ ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32.const 24 ; CHECK-NEXT: i32.add -; CHECK-NEXT: v128.load32_zero 0:p2align=0 +; CHECK-NEXT: v128.load32_zero 0 ; CHECK-NEXT: # fallthrough-return %q = ptrtoint i32* %p to i32 %r = add nsw i32 %q, 24 %s = inttoptr i32 %r to i32* - %t = tail call <4 x i32> @llvm.wasm.load32.zero(i32* %s) + %x = load i32, i32* %s + %t = insertelement <4 x i32> zeroinitializer, i32 %x, i32 0 ret <4 x i32> %t } @@ -86,10 +88,11 @@ ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32.const 24 ; CHECK-NEXT: i32.add -; CHECK-NEXT: v128.load32_zero 0:p2align=0 +; CHECK-NEXT: v128.load32_zero 0 ; CHECK-NEXT: # fallthrough-return %s = getelementptr i32, i32* %p, i32 6 - %t = tail call <4 x i32> @llvm.wasm.load32.zero(i32* %s) + %x = load i32, i32* %s + %t = insertelement <4 x i32> zeroinitializer, i32 %x, i32 0 ret <4 x i32> %t } @@ -98,10 +101,11 @@ ; CHECK: .functype load_zero_i32_from_numeric_address () -> (v128) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: i32.const 0 -; CHECK-NEXT: v128.load32_zero 42:p2align=0 +; CHECK-NEXT: v128.load32_zero 42 ; CHECK-NEXT: # fallthrough-return %s = inttoptr i32 42 to i32* - %t = tail call <4 x i32> @llvm.wasm.load32.zero(i32* %s) + %x = load i32, i32* %s + %t = insertelement <4 x i32> zeroinitializer, i32 %x, i32 0 ret <4 x i32> %t } @@ -111,9 +115,10 @@ ; CHECK: .functype load_zero_i32_from_global_address () -> (v128) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: i32.const 0 -; CHECK-NEXT: v128.load32_zero gv_i32:p2align=0 +; CHECK-NEXT: v128.load32_zero gv_i32 ; CHECK-NEXT: # fallthrough-return - %t = tail call <4 x i32> @llvm.wasm.load32.zero(i32* @gv_i32) + %x = load i32, i32* @gv_i32 + %t = insertelement <4 x i32> zeroinitializer, i32 %x, i32 0 ret <4 x i32> %t } @@ -126,9 +131,10 @@ ; CHECK: .functype load_zero_i64_no_offset (i32) -> (v128) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v128.load64_zero 0:p2align=0 +; CHECK-NEXT: v128.load64_zero 0 ; CHECK-NEXT: # fallthrough-return - %v = tail call <2 x i64> @llvm.wasm.load64.zero(i64* %p) + %x = load i64, i64* %p + %v = insertelement <2 x i64> zeroinitializer, i64 %x, i32 0 ret <2 x i64> %v } @@ -137,12 +143,13 @@ ; CHECK: .functype load_zero_i64_with_folded_offset (i32) -> (v128) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v128.load64_zero 24:p2align=0 +; CHECK-NEXT: v128.load64_zero 24 ; CHECK-NEXT: # fallthrough-return %q = ptrtoint i64* %p to i32 %r = add nuw i32 %q, 24 %s = inttoptr i32 %r to i64* - %t = tail call <2 x i64> @llvm.wasm.load64.zero(i64* %s) + %x = load i64, i64* %s + %t = insertelement <2 x i64> zeroinitializer, i64 %x, i32 0 ret <2 x i64> %t } @@ -151,10 +158,11 @@ ; CHECK: .functype load_zero_i64_with_folded_gep_offset (i32) -> (v128) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v128.load64_zero 48:p2align=0 +; CHECK-NEXT: v128.load64_zero 48 ; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds i64, i64* %p, i64 6 - %t = tail call <2 x i64> @llvm.wasm.load64.zero(i64* %s) + %x = load i64, i64* %s + %t = insertelement <2 x i64> zeroinitializer, i64 %x, i32 0 ret <2 x i64> %t } @@ -165,10 +173,11 @@ ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32.const -48 ; CHECK-NEXT: i32.add -; CHECK-NEXT: v128.load64_zero 0:p2align=0 +; CHECK-NEXT: v128.load64_zero 0 ; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds i64, i64* %p, i64 -6 - %t = tail call <2 x i64> @llvm.wasm.load64.zero(i64* %s) + %x = load i64, i64* %s + %t = insertelement <2 x i64> zeroinitializer, i64 %x, i32 0 ret <2 x i64> %t } @@ -179,12 +188,13 @@ ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32.const 24 ; CHECK-NEXT: i32.add -; CHECK-NEXT: v128.load64_zero 0:p2align=0 +; CHECK-NEXT: v128.load64_zero 0 ; CHECK-NEXT: # fallthrough-return %q = ptrtoint i64* %p to i32 %r = add nsw i32 %q, 24 %s = inttoptr i32 %r to i64* - %t = tail call <2 x i64> @llvm.wasm.load64.zero(i64* %s) + %x = load i64, i64* %s + %t = insertelement <2 x i64> zeroinitializer, i64 %x, i32 0 ret <2 x i64> %t } @@ -195,10 +205,11 @@ ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32.const 48 ; CHECK-NEXT: i32.add -; CHECK-NEXT: v128.load64_zero 0:p2align=0 +; CHECK-NEXT: v128.load64_zero 0 ; CHECK-NEXT: # fallthrough-return %s = getelementptr i64, i64* %p, i64 6 - %t = tail call <2 x i64> @llvm.wasm.load64.zero(i64* %s) + %x = load i64, i64* %s + %t = insertelement <2 x i64> zeroinitializer, i64 %x, i32 0 ret <2 x i64> %t } @@ -207,10 +218,11 @@ ; CHECK: .functype load_zero_i64_from_numeric_address () -> (v128) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: i32.const 0 -; CHECK-NEXT: v128.load64_zero 42:p2align=0 +; CHECK-NEXT: v128.load64_zero 42 ; CHECK-NEXT: # fallthrough-return %s = inttoptr i32 42 to i64* - %t = tail call <2 x i64> @llvm.wasm.load64.zero(i64* %s) + %x = load i64, i64* %s + %t = insertelement <2 x i64> zeroinitializer, i64 %x, i32 0 ret <2 x i64> %t } @@ -220,8 +232,9 @@ ; CHECK: .functype load_zero_i64_from_global_address () -> (v128) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: i32.const 0 -; CHECK-NEXT: v128.load64_zero gv_i64:p2align=0 +; CHECK-NEXT: v128.load64_zero gv_i64 ; CHECK-NEXT: # fallthrough-return - %t = tail call <2 x i64> @llvm.wasm.load64.zero(i64* @gv_i64) + %x = load i64, i64* @gv_i64 + %t = insertelement <2 x i64> zeroinitializer, i64 %x, i32 0 ret <2 x i64> %t }