diff --git a/clang/include/clang/Basic/BuiltinsWebAssembly.def b/clang/include/clang/Basic/BuiltinsWebAssembly.def --- a/clang/include/clang/Basic/BuiltinsWebAssembly.def +++ b/clang/include/clang/Basic/BuiltinsWebAssembly.def @@ -192,7 +192,6 @@ TARGET_BUILTIN(__builtin_wasm_trunc_sat_zero_s_f64x2_i32x4, "V4iV2d", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_trunc_sat_zero_u_f64x2_i32x4, "V4UiV2d", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_demote_zero_f64x2_f32x4, "V4fV2d", "nc", "simd128") -TARGET_BUILTIN(__builtin_wasm_promote_low_f32x4_f64x2, "V2dV4f", "nc", "simd128") TARGET_BUILTIN(__builtin_wasm_load32_zero, "V4iiC*", "n", "simd128") TARGET_BUILTIN(__builtin_wasm_load64_zero, "V2LLiLLiC*", "n", "simd128") diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -17728,11 +17728,6 @@ Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_demote_zero); return Builder.CreateCall(Callee, Vec); } - case WebAssembly::BI__builtin_wasm_promote_low_f32x4_f64x2: { - Value *Vec = EmitScalarExpr(E->getArg(0)); - Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_promote_low); - return Builder.CreateCall(Callee, Vec); - } case WebAssembly::BI__builtin_wasm_load32_zero: { Value *Ptr = EmitScalarExpr(E->getArg(0)); Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_load32_zero); diff --git a/clang/lib/Headers/wasm_simd128.h b/clang/lib/Headers/wasm_simd128.h --- a/clang/lib/Headers/wasm_simd128.h +++ b/clang/lib/Headers/wasm_simd128.h @@ -43,6 +43,7 @@ typedef int __i32x2 __attribute__((__vector_size__(8), __aligned__(8))); typedef unsigned int __u32x2 __attribute__((__vector_size__(8), __aligned__(8))); +typedef float __f32x2 __attribute__((__vector_size__(8), __aligned__(8))); #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, __target__("simd128"), \ @@ -1155,7 +1156,8 @@ static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_promote_low_f32x4(v128_t __a) { - return (v128_t)__builtin_wasm_promote_low_f32x4_f64x2((__f32x4)__a); + return (v128_t) __builtin_convertvector( + (__f32x2){((__f32x4)__a)[0], ((__f32x4)__a)[1]}, __f64x2); } #define wasm_i8x16_shuffle(__a, __b, __c0, __c1, __c2, __c3, __c4, __c5, __c6, \ diff --git a/clang/test/CodeGen/builtins-wasm.c b/clang/test/CodeGen/builtins-wasm.c --- a/clang/test/CodeGen/builtins-wasm.c +++ b/clang/test/CodeGen/builtins-wasm.c @@ -898,12 +898,6 @@ // WEBASSEMBLY: ret } -f64x2 wasm_promote_low_f32x4_f64x2(f32x4 x) { - return __builtin_wasm_promote_low_f32x4_f64x2(x); - // WEBASSEMBLY: call <2 x double> @llvm.wasm.promote.low(<4 x float> %x) - // WEBASSEMBLY: ret -} - i32x4 load32_zero(const int *p) { return __builtin_wasm_load32_zero(p); // WEBASSEMBLY: call <4 x i32> @llvm.wasm.load32.zero(i32* %p) diff --git a/clang/test/Headers/wasm.c b/clang/test/Headers/wasm.c --- a/clang/test/Headers/wasm.c +++ b/clang/test/Headers/wasm.c @@ -1281,7 +1281,7 @@ // CHECK-LABEL: @test_v128_any_true( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.wasm.anytrue.v16i8(<16 x i8> [[TMP0]]) #[[ATTR11:[0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.wasm.anytrue.v16i8(<16 x i8> [[TMP0]]) #[[ATTR10:[0-9]+]] // CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP1]], 0 // CHECK-NEXT: ret i1 [[TOBOOL_I]] // @@ -1291,7 +1291,7 @@ // CHECK-LABEL: @test_v128_bitselect( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.wasm.bitselect.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> [[MASK:%.*]]) #[[ATTR11]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.wasm.bitselect.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]], <4 x i32> [[MASK:%.*]]) #[[ATTR10]] // CHECK-NEXT: ret <4 x i32> [[TMP0]] // v128_t test_v128_bitselect(v128_t a, v128_t b, v128_t mask) { @@ -1301,7 +1301,7 @@ // CHECK-LABEL: @test_i8x16_abs( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.abs.v16i8(<16 x i8> [[TMP0]], i1 false) #[[ATTR11]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.abs.v16i8(<16 x i8> [[TMP0]], i1 false) #[[ATTR10]] // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP2]] // @@ -1323,7 +1323,7 @@ // CHECK-LABEL: @test_i8x16_all_true( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.wasm.alltrue.v16i8(<16 x i8> [[TMP0]]) #[[ATTR11]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.wasm.alltrue.v16i8(<16 x i8> [[TMP0]]) #[[ATTR10]] // CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP1]], 0 // CHECK-NEXT: ret i1 [[TOBOOL_I]] // @@ -1334,7 +1334,7 @@ // CHECK-LABEL: @test_i8x16_bitmask( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.wasm.bitmask.v16i8(<16 x i8> [[TMP0]]) #[[ATTR11]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.wasm.bitmask.v16i8(<16 x i8> [[TMP0]]) #[[ATTR10]] // CHECK-NEXT: ret i32 [[TMP1]] // int32_t test_i8x16_bitmask(v128_t a) { @@ -1344,7 +1344,7 @@ // CHECK-LABEL: @test_i8x16_popcnt( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.wasm.popcnt(<16 x i8> [[TMP0]]) #[[ATTR11]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i8> @llvm.wasm.popcnt(<16 x i8> [[TMP0]]) #[[ATTR10]] // CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP2]] // @@ -1410,7 +1410,7 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR11]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR10]] // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP3]] // @@ -1422,7 +1422,7 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR11]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR10]] // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP3]] // @@ -1446,7 +1446,7 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.sub.sat.signed.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR11]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.sub.sat.signed.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR10]] // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP3]] // @@ -1458,7 +1458,7 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.sub.sat.unsigned.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR11]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.sub.sat.unsigned.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR10]] // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP3]] // @@ -1522,7 +1522,7 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.avgr.unsigned.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR11]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.avgr.unsigned.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR10]] // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP3]] // @@ -1533,7 +1533,7 @@ // CHECK-LABEL: @test_i16x8_abs( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16> -// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.abs.v8i16(<8 x i16> [[TMP0]], i1 false) #[[ATTR11]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.abs.v8i16(<8 x i16> [[TMP0]], i1 false) #[[ATTR10]] // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP2]] // @@ -1555,7 +1555,7 @@ // CHECK-LABEL: @test_i16x8_all_true( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16> -// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.wasm.alltrue.v8i16(<8 x i16> [[TMP0]]) #[[ATTR11]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.wasm.alltrue.v8i16(<8 x i16> [[TMP0]]) #[[ATTR10]] // CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP1]], 0 // CHECK-NEXT: ret i1 [[TOBOOL_I]] // @@ -1566,7 +1566,7 @@ // CHECK-LABEL: @test_i16x8_bitmask( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16> -// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.wasm.bitmask.v8i16(<8 x i16> [[TMP0]]) #[[ATTR11]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.wasm.bitmask.v8i16(<8 x i16> [[TMP0]]) #[[ATTR10]] // CHECK-NEXT: ret i32 [[TMP1]] // int32_t test_i16x8_bitmask(v128_t a) { @@ -1631,7 +1631,7 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16> -// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR11]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR10]] // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP3]] // @@ -1643,7 +1643,7 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16> -// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR11]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR10]] // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP3]] // @@ -1667,7 +1667,7 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16> -// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.sub.sat.signed.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR11]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.sub.sat.signed.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR10]] // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP3]] // @@ -1679,7 +1679,7 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16> -// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.sub.sat.unsigned.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR11]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.sub.sat.unsigned.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR10]] // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP3]] // @@ -1755,7 +1755,7 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16> -// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.avgr.unsigned.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR11]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.avgr.unsigned.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR10]] // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP3]] // @@ -1765,7 +1765,7 @@ // CHECK-LABEL: @test_i32x4_abs( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.abs.v4i32(<4 x i32> [[A:%.*]], i1 false) #[[ATTR11]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <4 x i32> @llvm.abs.v4i32(<4 x i32> [[A:%.*]], i1 false) #[[ATTR10]] // CHECK-NEXT: ret <4 x i32> [[TMP0]] // v128_t test_i32x4_abs(v128_t a) { @@ -1783,7 +1783,7 @@ // CHECK-LABEL: @test_i32x4_all_true( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.wasm.alltrue.v4i32(<4 x i32> [[A:%.*]]) #[[ATTR11]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.wasm.alltrue.v4i32(<4 x i32> [[A:%.*]]) #[[ATTR10]] // CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP0]], 0 // CHECK-NEXT: ret i1 [[TOBOOL_I]] // @@ -1793,7 +1793,7 @@ // CHECK-LABEL: @test_i32x4_bitmask( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.wasm.bitmask.v4i32(<4 x i32> [[A:%.*]]) #[[ATTR11]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.wasm.bitmask.v4i32(<4 x i32> [[A:%.*]]) #[[ATTR10]] // CHECK-NEXT: ret i32 [[TMP0]] // int32_t test_i32x4_bitmask(v128_t a) { @@ -1904,7 +1904,7 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16> -// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.wasm.dot(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR11]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.wasm.dot(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR10]] // CHECK-NEXT: ret <4 x i32> [[TMP2]] // v128_t test_i32x4_dot_i16x8(v128_t a, v128_t b) { @@ -1914,7 +1914,7 @@ // CHECK-LABEL: @test_i64x2_abs( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64> -// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.abs.v2i64(<2 x i64> [[TMP0]], i1 false) #[[ATTR11]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.abs.v2i64(<2 x i64> [[TMP0]], i1 false) #[[ATTR10]] // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP2]] // @@ -1936,7 +1936,7 @@ // CHECK-LABEL: @test_i64x2_all_true( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64> -// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.wasm.alltrue.v2i64(<2 x i64> [[TMP0]]) #[[ATTR11]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.wasm.alltrue.v2i64(<2 x i64> [[TMP0]]) #[[ATTR10]] // CHECK-NEXT: [[TOBOOL_I:%.*]] = icmp ne i32 [[TMP1]], 0 // CHECK-NEXT: ret i1 [[TOBOOL_I]] // @@ -1947,7 +1947,7 @@ // CHECK-LABEL: @test_i64x2_bitmask( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x i64> -// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.wasm.bitmask.v2i64(<2 x i64> [[TMP0]]) #[[ATTR11]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.wasm.bitmask.v2i64(<2 x i64> [[TMP0]]) #[[ATTR10]] // CHECK-NEXT: ret i32 [[TMP1]] // int32_t test_i64x2_bitmask(v128_t a) { @@ -2035,7 +2035,7 @@ // CHECK-LABEL: @test_f32x4_abs( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float> -// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]]) #[[ATTR11]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]]) #[[ATTR10]] // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP2]] // @@ -2057,7 +2057,7 @@ // CHECK-LABEL: @test_f32x4_sqrt( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float> -// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]]) #[[ATTR11]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]]) #[[ATTR10]] // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP2]] // @@ -2068,7 +2068,7 @@ // CHECK-LABEL: @test_f32x4_ceil( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float> -// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.ceil.v4f32(<4 x float> [[TMP0]]) #[[ATTR11]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.ceil.v4f32(<4 x float> [[TMP0]]) #[[ATTR10]] // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP2]] // @@ -2079,7 +2079,7 @@ // CHECK-LABEL: @test_f32x4_floor( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float> -// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.floor.v4f32(<4 x float> [[TMP0]]) #[[ATTR11]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.floor.v4f32(<4 x float> [[TMP0]]) #[[ATTR10]] // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP2]] // @@ -2090,7 +2090,7 @@ // CHECK-LABEL: @test_f32x4_trunc( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float> -// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.trunc.v4f32(<4 x float> [[TMP0]]) #[[ATTR11]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.trunc.v4f32(<4 x float> [[TMP0]]) #[[ATTR10]] // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP2]] // @@ -2101,7 +2101,7 @@ // CHECK-LABEL: @test_f32x4_nearest( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float> -// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.nearbyint.v4f32(<4 x float> [[TMP0]]) #[[ATTR11]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.nearbyint.v4f32(<4 x float> [[TMP0]]) #[[ATTR10]] // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP2]] // @@ -2161,7 +2161,7 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float> -// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.minimum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) #[[ATTR11]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.minimum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) #[[ATTR10]] // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP3]] // @@ -2173,7 +2173,7 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float> -// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.maximum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) #[[ATTR11]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.maximum.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) #[[ATTR10]] // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP3]] // @@ -2185,7 +2185,7 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float> -// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.wasm.pmin.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) #[[ATTR11]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.wasm.pmin.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) #[[ATTR10]] // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP3]] // @@ -2197,7 +2197,7 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <4 x float> -// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.wasm.pmax.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) #[[ATTR11]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.wasm.pmax.v4f32(<4 x float> [[TMP0]], <4 x float> [[TMP1]]) #[[ATTR10]] // CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x float> [[TMP2]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP3]] // @@ -2208,7 +2208,7 @@ // CHECK-LABEL: @test_f64x2_abs( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double> -// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.fabs.v2f64(<2 x double> [[TMP0]]) #[[ATTR11]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.fabs.v2f64(<2 x double> [[TMP0]]) #[[ATTR10]] // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP2]] // @@ -2230,7 +2230,7 @@ // CHECK-LABEL: @test_f64x2_sqrt( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double> -// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.sqrt.v2f64(<2 x double> [[TMP0]]) #[[ATTR11]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.sqrt.v2f64(<2 x double> [[TMP0]]) #[[ATTR10]] // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP2]] // @@ -2241,7 +2241,7 @@ // CHECK-LABEL: @test_f64x2_ceil( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double> -// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.ceil.v2f64(<2 x double> [[TMP0]]) #[[ATTR11]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.ceil.v2f64(<2 x double> [[TMP0]]) #[[ATTR10]] // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP2]] // @@ -2252,7 +2252,7 @@ // CHECK-LABEL: @test_f64x2_floor( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double> -// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.floor.v2f64(<2 x double> [[TMP0]]) #[[ATTR11]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.floor.v2f64(<2 x double> [[TMP0]]) #[[ATTR10]] // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP2]] // @@ -2263,7 +2263,7 @@ // CHECK-LABEL: @test_f64x2_trunc( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double> -// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.trunc.v2f64(<2 x double> [[TMP0]]) #[[ATTR11]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.trunc.v2f64(<2 x double> [[TMP0]]) #[[ATTR10]] // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP2]] // @@ -2274,7 +2274,7 @@ // CHECK-LABEL: @test_f64x2_nearest( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double> -// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.nearbyint.v2f64(<2 x double> [[TMP0]]) #[[ATTR11]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.nearbyint.v2f64(<2 x double> [[TMP0]]) #[[ATTR10]] // CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP2]] // @@ -2334,7 +2334,7 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double> -// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.minimum.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) #[[ATTR11]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.minimum.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) #[[ATTR10]] // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP3]] // @@ -2346,7 +2346,7 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double> -// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.maximum.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) #[[ATTR11]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.maximum.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) #[[ATTR10]] // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP3]] // @@ -2358,7 +2358,7 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double> -// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.wasm.pmin.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) #[[ATTR11]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.wasm.pmin.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) #[[ATTR10]] // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP3]] // @@ -2370,7 +2370,7 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <2 x double> -// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.wasm.pmax.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) #[[ATTR11]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.wasm.pmax.v2f64(<2 x double> [[TMP0]], <2 x double> [[TMP1]]) #[[ATTR10]] // CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x double> [[TMP2]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP3]] // @@ -2381,7 +2381,7 @@ // CHECK-LABEL: @test_i32x4_trunc_sat_f32x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float> -// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> [[TMP0]]) #[[ATTR11]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> [[TMP0]]) #[[ATTR10]] // CHECK-NEXT: ret <4 x i32> [[TMP1]] // v128_t test_i32x4_trunc_sat_f32x4(v128_t a) { @@ -2391,7 +2391,7 @@ // CHECK-LABEL: @test_u32x4_trunc_sat_f32x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float> -// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> [[TMP0]]) #[[ATTR11]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> [[TMP0]]) #[[ATTR10]] // CHECK-NEXT: ret <4 x i32> [[TMP1]] // v128_t test_u32x4_trunc_sat_f32x4(v128_t a) { @@ -2443,7 +2443,7 @@ // CHECK-LABEL: @test_i32x4_trunc_sat_f64x2_zero( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double> -// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i32> @llvm.fptosi.sat.v2i32.v2f64(<2 x double> [[TMP0]]) #[[ATTR11]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i32> @llvm.fptosi.sat.v2i32.v2f64(<2 x double> [[TMP0]]) #[[ATTR10]] // CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> zeroinitializer, <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP2]] // @@ -2454,7 +2454,7 @@ // CHECK-LABEL: @test_u32x4_trunc_sat_f64x2_zero( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double> -// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i32> @llvm.fptoui.sat.v2i32.v2f64(<2 x double> [[TMP0]]) #[[ATTR11]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i32> @llvm.fptoui.sat.v2i32.v2f64(<2 x double> [[TMP0]]) #[[ATTR10]] // CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> zeroinitializer, <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP2]] // @@ -2465,7 +2465,7 @@ // CHECK-LABEL: @test_f32x4_demote_f64x2_zero( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <2 x double> -// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.wasm.demote.zero(<2 x double> [[TMP0]]) #[[ATTR11]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.wasm.demote.zero(<2 x double> [[TMP0]]) #[[ATTR10]] // CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x float> [[TMP1]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP2]] // @@ -2476,9 +2476,10 @@ // CHECK-LABEL: @test_f64x2_promote_low_f32x4( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <4 x float> -// CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.wasm.promote.low(<4 x float> [[TMP0]]) #[[ATTR11]] -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x double> [[TMP1]] to <4 x i32> -// CHECK-NEXT: ret <4 x i32> [[TMP2]] +// CHECK-NEXT: [[VECINIT2_I:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> undef, <2 x i32> +// CHECK-NEXT: [[CONV_I:%.*]] = fpext <2 x float> [[VECINIT2_I]] to <2 x double> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x double> [[CONV_I]] to <4 x i32> +// CHECK-NEXT: ret <4 x i32> [[TMP1]] // v128_t test_f64x2_promote_low_f32x4(v128_t a) { return wasm_f64x2_promote_low_f32x4(a); @@ -2536,7 +2537,7 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.swizzle(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR11]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.swizzle(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR10]] // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP3]] // @@ -2548,7 +2549,7 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16> -// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.narrow.signed.v16i8.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR11]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.narrow.signed.v16i8.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR10]] // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP3]] // @@ -2560,7 +2561,7 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16> -// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.narrow.unsigned.v16i8.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR11]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i8> @llvm.wasm.narrow.unsigned.v16i8.v8i16(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR10]] // CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP3]] // @@ -2570,7 +2571,7 @@ // CHECK-LABEL: @test_i16x8_narrow_i32x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.wasm.narrow.signed.v8i16.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR11]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.wasm.narrow.signed.v8i16.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR10]] // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP1]] // @@ -2580,7 +2581,7 @@ // CHECK-LABEL: @test_u16x8_narrow_i32x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.wasm.narrow.unsigned.v8i16.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR11]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <8 x i16> @llvm.wasm.narrow.unsigned.v8i16.v4i32(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR10]] // CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[TMP0]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP1]] // @@ -2727,7 +2728,7 @@ // CHECK-LABEL: @test_i16x8_extadd_pairwise_i8x16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.wasm.extadd.pairwise.signed.v8i16(<16 x i8> [[TMP0]]) #[[ATTR11]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.wasm.extadd.pairwise.signed.v8i16(<16 x i8> [[TMP0]]) #[[ATTR10]] // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP2]] // @@ -2738,7 +2739,7 @@ // CHECK-LABEL: @test_u16x8_extadd_pairwise_u8x16( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.wasm.extadd.pairwise.unsigned.v8i16(<16 x i8> [[TMP0]]) #[[ATTR11]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.wasm.extadd.pairwise.unsigned.v8i16(<16 x i8> [[TMP0]]) #[[ATTR10]] // CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i16> [[TMP1]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP2]] // @@ -2749,7 +2750,7 @@ // CHECK-LABEL: @test_i32x4_extadd_pairwise_i16x8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16> -// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.wasm.extadd.pairwise.signed.v4i32(<8 x i16> [[TMP0]]) #[[ATTR11]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.wasm.extadd.pairwise.signed.v4i32(<8 x i16> [[TMP0]]) #[[ATTR10]] // CHECK-NEXT: ret <4 x i32> [[TMP1]] // v128_t test_i32x4_extadd_pairwise_i16x8(v128_t a) { @@ -2759,7 +2760,7 @@ // CHECK-LABEL: @test_u32x4_extadd_pairwise_u16x8( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16> -// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.wasm.extadd.pairwise.unsigned.v4i32(<8 x i16> [[TMP0]]) #[[ATTR11]] +// CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.wasm.extadd.pairwise.unsigned.v4i32(<8 x i16> [[TMP0]]) #[[ATTR10]] // CHECK-NEXT: ret <4 x i32> [[TMP1]] // v128_t test_u32x4_extadd_pairwise_u16x8(v128_t a) { @@ -2770,7 +2771,7 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.extmul.low.signed.v8i16(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR11]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.extmul.low.signed.v8i16(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR10]] // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP3]] // @@ -2782,7 +2783,7 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.extmul.high.signed.v8i16(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR11]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.extmul.high.signed.v8i16(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR10]] // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP3]] // @@ -2794,7 +2795,7 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.extmul.low.unsigned.v8i16(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR11]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.extmul.low.unsigned.v8i16(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR10]] // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP3]] // @@ -2806,7 +2807,7 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <16 x i8> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <16 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.extmul.high.unsigned.v8i16(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR11]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.extmul.high.unsigned.v8i16(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]]) #[[ATTR10]] // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP3]] // @@ -2818,7 +2819,7 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16> -// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.wasm.extmul.low.signed.v4i32(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR11]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.wasm.extmul.low.signed.v4i32(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR10]] // CHECK-NEXT: ret <4 x i32> [[TMP2]] // v128_t test_i32x4_extmul_low_i16x8(v128_t a, v128_t b) { @@ -2829,7 +2830,7 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16> -// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.wasm.extmul.high.signed.v4i32(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR11]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.wasm.extmul.high.signed.v4i32(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR10]] // CHECK-NEXT: ret <4 x i32> [[TMP2]] // v128_t test_i32x4_extmul_high_i16x8(v128_t a, v128_t b) { @@ -2840,7 +2841,7 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16> -// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.wasm.extmul.low.unsigned.v4i32(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR11]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.wasm.extmul.low.unsigned.v4i32(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR10]] // CHECK-NEXT: ret <4 x i32> [[TMP2]] // v128_t test_u32x4_extmul_low_u16x8(v128_t a, v128_t b) { @@ -2851,7 +2852,7 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16> -// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.wasm.extmul.high.unsigned.v4i32(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR11]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.wasm.extmul.high.unsigned.v4i32(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR10]] // CHECK-NEXT: ret <4 x i32> [[TMP2]] // v128_t test_u32x4_extmul_high_u16x8(v128_t a, v128_t b) { @@ -2860,7 +2861,7 @@ // CHECK-LABEL: @test_i64x2_extmul_low_i32x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.wasm.extmul.low.signed.v2i64(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR11]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.wasm.extmul.low.signed.v2i64(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR10]] // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP1]] // @@ -2870,7 +2871,7 @@ // CHECK-LABEL: @test_i64x2_extmul_high_i32x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.wasm.extmul.high.signed.v2i64(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR11]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.wasm.extmul.high.signed.v2i64(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR10]] // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP1]] // @@ -2880,7 +2881,7 @@ // CHECK-LABEL: @test_u64x2_extmul_low_u32x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.wasm.extmul.low.unsigned.v2i64(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR11]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.wasm.extmul.low.unsigned.v2i64(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR10]] // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP1]] // @@ -2890,7 +2891,7 @@ // CHECK-LABEL: @test_u64x2_extmul_high_u32x4( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.wasm.extmul.high.unsigned.v2i64(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR11]] +// CHECK-NEXT: [[TMP0:%.*]] = tail call <2 x i64> @llvm.wasm.extmul.high.unsigned.v2i64(<4 x i32> [[A:%.*]], <4 x i32> [[B:%.*]]) #[[ATTR10]] // CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[TMP0]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP1]] // @@ -2902,7 +2903,7 @@ // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A:%.*]] to <8 x i16> // CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B:%.*]] to <8 x i16> -// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.q15mulr.sat.signed(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR11]] +// CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.wasm.q15mulr.sat.signed(<8 x i16> [[TMP0]], <8 x i16> [[TMP1]]) #[[ATTR10]] // CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[TMP2]] to <4 x i32> // CHECK-NEXT: ret <4 x i32> [[TMP3]] // diff --git a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td --- a/llvm/include/llvm/IR/IntrinsicsWebAssembly.td +++ b/llvm/include/llvm/IR/IntrinsicsWebAssembly.td @@ -263,13 +263,10 @@ [LLVMSubdivide2VectorType<0>], [IntrNoMem, IntrSpeculatable]>; -// TODO: Remove these if possible if they are merged to the spec. +// TODO: Remove this if possible. def int_wasm_demote_zero : Intrinsic<[llvm_v4f32_ty], [llvm_v2f64_ty], [IntrNoMem, IntrSpeculatable]>; -def int_wasm_promote_low : - Intrinsic<[llvm_v2f64_ty], [llvm_v4f32_ty], - [IntrNoMem, IntrSpeculatable]>; //===----------------------------------------------------------------------===// // Thread-local storage intrinsics diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISD.def b/llvm/lib/Target/WebAssembly/WebAssemblyISD.def --- a/llvm/lib/Target/WebAssembly/WebAssemblyISD.def +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISD.def @@ -37,6 +37,7 @@ HANDLE_NODETYPE(EXTEND_HIGH_U) HANDLE_NODETYPE(CONVERT_LOW_S) HANDLE_NODETYPE(CONVERT_LOW_U) +HANDLE_NODETYPE(PROMOTE_LOW) HANDLE_NODETYPE(TRUNC_SAT_ZERO_S) HANDLE_NODETYPE(TRUNC_SAT_ZERO_U) HANDLE_NODETYPE(THROW) diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -149,9 +149,11 @@ setTargetDAGCombine(ISD::SIGN_EXTEND); setTargetDAGCombine(ISD::ZERO_EXTEND); - // Combine int_to_fp of extract_vectors and vice versa into conversions ops + // Combine int_to_fp or fp_extend of extract_vectors and vice versa into + // conversions ops setTargetDAGCombine(ISD::SINT_TO_FP); setTargetDAGCombine(ISD::UINT_TO_FP); + setTargetDAGCombine(ISD::FP_EXTEND); setTargetDAGCombine(ISD::EXTRACT_SUBVECTOR); // Combine concat of {s,u}int_to_fp_sat to i32x4.trunc_sat_f64x2_zero_{s,u} @@ -2186,60 +2188,109 @@ if (ResVT != MVT::v2f64) return SDValue(); - if (N->getOpcode() == ISD::SINT_TO_FP || N->getOpcode() == ISD::UINT_TO_FP) { - // Combine this: - // - // (v2f64 ({s,u}int_to_fp - // (v2i32 (extract_subvector (v4i32 $x), 0)))) - // - // into (f64x2.convert_low_i32x4_{s,u} $x). - auto Extract = N->getOperand(0); - if (Extract.getOpcode() != ISD::EXTRACT_SUBVECTOR) - return SDValue(); - if (Extract.getValueType() != MVT::v2i32) - return SDValue(); - auto Source = Extract.getOperand(0); - if (Source.getValueType() != MVT::v4i32) - return SDValue(); - auto *IndexNode = dyn_cast(Extract.getOperand(1)); - if (IndexNode == nullptr || IndexNode->getZExtValue() != 0) - return SDValue(); - - unsigned Op = N->getOpcode() == ISD::SINT_TO_FP - ? WebAssemblyISD::CONVERT_LOW_S - : WebAssemblyISD::CONVERT_LOW_U; - - return DAG.getNode(Op, SDLoc(N), ResVT, Source); + auto GetWasmConversionOp = [](unsigned Op) { + switch (Op) { + case ISD::SINT_TO_FP: + return WebAssemblyISD::CONVERT_LOW_S; + case ISD::UINT_TO_FP: + return WebAssemblyISD::CONVERT_LOW_U; + case ISD::FP_EXTEND: + return WebAssemblyISD::PROMOTE_LOW; + } + llvm_unreachable("unexpected op"); + }; - } else if (N->getOpcode() == ISD::EXTRACT_SUBVECTOR) { + if (N->getOpcode() == ISD::EXTRACT_SUBVECTOR) { // Combine this: // // (v2f64 (extract_subvector // (v4f64 ({s,u}int_to_fp (v4i32 $x))), 0)) // // into (f64x2.convert_low_i32x4_{s,u} $x). - auto IntToFP = N->getOperand(0); - if (IntToFP.getOpcode() != ISD::SINT_TO_FP && - IntToFP.getOpcode() != ISD::UINT_TO_FP) + // + // Or this: + // + // (v2f64 (extract_subvector + // (v4f64 (fp_extend (v4f32 $x))), 0)) + // + // into (f64x2.promote_low_f32x4 $x). + auto Conversion = N->getOperand(0); + auto ConversionOp = Conversion.getOpcode(); + MVT ExpectedSourceType; + switch (ConversionOp) { + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: + ExpectedSourceType = MVT::v4i32; + break; + case ISD::FP_EXTEND: + ExpectedSourceType = MVT::v4f32; + break; + default: return SDValue(); - if (IntToFP.getValueType() != MVT::v4f64) + } + + if (Conversion.getValueType() != MVT::v4f64) return SDValue(); - auto Source = IntToFP.getOperand(0); - if (Source.getValueType() != MVT::v4i32) + + auto Source = Conversion.getOperand(0); + if (Source.getValueType() != ExpectedSourceType) return SDValue(); + auto IndexNode = dyn_cast(N->getOperand(1)); if (IndexNode == nullptr || IndexNode->getZExtValue() != 0) return SDValue(); - unsigned Op = IntToFP->getOpcode() == ISD::SINT_TO_FP - ? WebAssemblyISD::CONVERT_LOW_S - : WebAssemblyISD::CONVERT_LOW_U; - + auto Op = GetWasmConversionOp(ConversionOp); return DAG.getNode(Op, SDLoc(N), ResVT, Source); + } - } else { + // Combine this: + // + // (v2f64 ({s,u}int_to_fp + // (v2i32 (extract_subvector (v4i32 $x), 0)))) + // + // into (f64x2.convert_low_i32x4_{s,u} $x). + // + // Or this: + // + // (v2f64 (fp_extend + // (v2f32 (extract_subvector (v4f32 $x), 0)))) + // + // into (f64x2.promote_low_f32x4 $x). + auto ConversionOp = N->getOpcode(); + MVT ExpectedExtractType; + MVT ExpectedSourceType; + switch (ConversionOp) { + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: + ExpectedExtractType = MVT::v2i32; + ExpectedSourceType = MVT::v4i32; + break; + case ISD::FP_EXTEND: + ExpectedExtractType = MVT::v2f32; + ExpectedSourceType = MVT::v4f32; + break; + default: llvm_unreachable("unexpected opcode"); } + + auto Extract = N->getOperand(0); + if (Extract.getOpcode() != ISD::EXTRACT_SUBVECTOR) + return SDValue(); + + if (Extract.getValueType() != ExpectedExtractType) + return SDValue(); + + auto Source = Extract.getOperand(0); + if (Source.getValueType() != ExpectedSourceType) + return SDValue(); + + auto *IndexNode = dyn_cast(Extract.getOperand(1)); + if (IndexNode == nullptr || IndexNode->getZExtValue() != 0) + return SDValue(); + + unsigned Op = GetWasmConversionOp(ConversionOp); + return DAG.getNode(Op, SDLoc(N), ResVT, Source); } static SDValue @@ -2298,6 +2349,7 @@ return performVectorExtendCombine(N, DCI); case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: + case ISD::FP_EXTEND: case ISD::EXTRACT_SUBVECTOR: return performVectorConvertLowCombine(N, DCI); case ISD::CONCAT_VECTORS: diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td @@ -1288,11 +1288,13 @@ defm "" : SIMDConvert; -// Prototype f64x2 conversions +// f64x2 <-> f32x4 conversions defm "" : SIMDConvert; -defm "" : SIMDConvert; + +def promote_t : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>; +def promote_low : SDNode<"WebAssemblyISD::PROMOTE_LOW", promote_t>; +defm "" : SIMDConvert; //===----------------------------------------------------------------------===// // Saturating Rounding Q-Format Multiplication diff --git a/llvm/test/CodeGen/WebAssembly/simd-conversions.ll b/llvm/test/CodeGen/WebAssembly/simd-conversions.ll --- a/llvm/test/CodeGen/WebAssembly/simd-conversions.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-conversions.ll @@ -126,3 +126,25 @@ %a = shufflevector <4 x double> %v, <4 x double> undef, <2 x i32> ret <2 x double> %a } + +; CHECK-LABEL: promote_low_v2f64: +; NO-SIMD128-NOT: f64x2 +; SIMD128-NEXT: .functype promote_low_v2f64 (v128) -> (v128){{$}} +; SIMD128-NEXT: f64x2.promote_low_f32x4 $push[[R:[0-9]+]]=, $0 +; SIMD128-NEXT: return $pop[[R]] +define <2 x double> @promote_low_v2f64(<4 x float> %x) { + %v = shufflevector <4 x float> %x, <4 x float> undef, <2 x i32> + %a = fpext <2 x float> %v to <2 x double> + ret <2 x double> %a +} + +; CHECK-LABEL: promote_low_v2f64_2: +; NO-SIMD128-NOT: f64x2 +; SIMD128-NEXT: .functype promote_low_v2f64_2 (v128) -> (v128){{$}} +; SIMD128-NEXT: f64x2.promote_low_f32x4 $push[[R:[0-9]+]]=, $0 +; SIMD128-NEXT: return $pop[[R]] +define <2 x double> @promote_low_v2f64_2(<4 x float> %x) { + %v = fpext <4 x float> %x to <4 x double> + %a = shufflevector <4 x double> %v, <4 x double> undef, <2 x i32> + ret <2 x double> %a +} diff --git a/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll b/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll --- a/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll @@ -806,13 +806,3 @@ %v = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %a) ret <2 x double> %v } - -; CHECK-LABEL: promote_low_v2f64: -; CHECK-NEXT: .functype promote_low_v2f64 (v128) -> (v128){{$}} -; CHECK-NEXT: f64x2.promote_low_f32x4 $push[[R:[0-9]+]]=, $0{{$}} -; CHECK-NEXT: return $pop[[R]]{{$}} -declare <2 x double> @llvm.wasm.promote.low(<4 x float>) -define <2 x double> @promote_low_v2f64(<4 x float> %a) { - %v = call <2 x double> @llvm.wasm.promote.low(<4 x float> %a) - ret <2 x double> %v -}