diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISD.def b/llvm/lib/Target/WebAssembly/WebAssemblyISD.def --- a/llvm/lib/Target/WebAssembly/WebAssemblyISD.def +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISD.def @@ -46,7 +46,6 @@ HANDLE_NODETYPE(MEMORY_FILL) // Memory intrinsics -HANDLE_MEM_NODETYPE(LOAD_SPLAT) HANDLE_MEM_NODETYPE(GLOBAL_GET) HANDLE_MEM_NODETYPE(GLOBAL_SET) HANDLE_MEM_NODETYPE(TABLE_GET) diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -194,6 +194,11 @@ MVT::v2f64}) setOperationAction(ISD::VECTOR_SHUFFLE, T, Custom); + // Support splatting + for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64, + MVT::v2f64}) + setOperationAction(ISD::SPLAT_VECTOR, T, Legal); + // Custom lowering since wasm shifts must have a scalar shift amount for (auto Op : {ISD::SHL, ISD::SRA, ISD::SRL}) for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64}) @@ -253,6 +258,7 @@ // But saturating fp_to_int converstions are for (auto Op : {ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}) setOperationAction(Op, MVT::v4i32, Custom); + } // As a special case, these operators use the type to mean the type to @@ -2160,18 +2166,8 @@ return IsConstant(Lane); }; } else { - // Use a splat, but possibly a load_splat - LoadSDNode *SplattedLoad; - if ((SplattedLoad = dyn_cast(SplatValue)) && - SplattedLoad->getMemoryVT() == VecT.getVectorElementType()) { - Result = DAG.getMemIntrinsicNode( - WebAssemblyISD::LOAD_SPLAT, DL, DAG.getVTList(VecT), - {SplattedLoad->getChain(), SplattedLoad->getBasePtr(), - SplattedLoad->getOffset()}, - SplattedLoad->getMemoryVT(), SplattedLoad->getMemOperand()); - } else { - Result = DAG.getSplatBuildVector(VecT, DL, SplatValue); - } + // Use a splat (which might be selected as a load splat) + Result = DAG.getSplatBuildVector(VecT, DL, SplatValue); IsLaneConstructed = [&SplatValue](size_t _, const SDValue &Lane) { return Lane == SplatValue; }; diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td @@ -54,15 +54,6 @@ foreach SIZE = [2, 4, 8, 16, 32] in def LaneIdx#SIZE : ImmLeaf; -// Create vector with identical lanes: splat -def splat2 : PatFrag<(ops node:$x), (build_vector $x, $x)>; -def splat4 : PatFrag<(ops node:$x), (build_vector $x, $x, $x, $x)>; -def splat8 : PatFrag<(ops node:$x), (build_vector $x, $x, $x, $x, - $x, $x, $x, $x)>; -def splat16 : PatFrag<(ops node:$x), - (build_vector $x, $x, $x, $x, $x, $x, $x, $x, - $x, $x, $x, $x, $x, $x, $x, $x)>; - class Vec { ValueType vt; ValueType int_vt; @@ -70,6 +61,7 @@ WebAssemblyRegClass lane_rc; int lane_bits; ImmLeaf lane_idx; + SDPatternOperator load_type; PatFrag splat; string prefix; Vec split; @@ -82,7 +74,8 @@ let lane_rc = I32; let lane_bits = 8; let lane_idx = LaneIdx16; - let splat = splat16; + let lane_load = extloadi8; + let splat = PatFrag<(ops node:$x), (v16i8 (splat_vector (i8 $x)))>; let prefix = "i8x16"; } @@ -93,7 +86,8 @@ let lane_rc = I32; let lane_bits = 16; let lane_idx = LaneIdx8; - let splat = splat8; + let lane_load = extloadi16; + let splat = PatFrag<(ops node:$x), (v8i16 (splat_vector (i16 $x)))>; let prefix = "i16x8"; let split = I8x16; } @@ -105,7 +99,8 @@ let lane_rc = I32; let lane_bits = 32; let lane_idx = LaneIdx4; - let splat = splat4; + let lane_load = load; + let splat = PatFrag<(ops node:$x), (v4i32 (splat_vector (i32 $x)))>; let prefix = "i32x4"; let split = I16x8; } @@ -117,7 +112,8 @@ let lane_rc = I64; let lane_bits = 64; let lane_idx = LaneIdx2; - let splat = splat2; + let lane_load = load; + let splat = PatFrag<(ops node:$x), (v2i64 (splat_vector (i64 $x)))>; let prefix = "i64x2"; let split = I32x4; } @@ -129,7 +125,8 @@ let lane_rc = F32; let lane_bits = 32; let lane_idx = LaneIdx4; - let splat = splat4; + let lane_load = load; + let splat = PatFrag<(ops node:$x), (v4f32 (splat_vector (f32 $x)))>; let prefix = "f32x4"; } @@ -140,7 +137,8 @@ let lane_rc = F64; let lane_bits = 64; let lane_idx = LaneIdx2; - let splat = splat2; + let lane_load = load; + let splat = PatFrag<(ops node:$x), (v2f64 (splat_vector (f64 $x)))>; let prefix = "f64x2"; } @@ -195,14 +193,11 @@ defm "" : SIMDLoadSplat<32, 9>; defm "" : SIMDLoadSplat<64, 10>; -def wasm_load_splat_t : SDTypeProfile<1, 1, [SDTCisPtrTy<1>]>; -def wasm_load_splat : SDNode<"WebAssemblyISD::LOAD_SPLAT", wasm_load_splat_t, - [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; -def load_splat : PatFrag<(ops node:$addr), (wasm_load_splat node:$addr)>; - foreach vec = AllVecs in { -defvar inst = "LOAD"#vec.lane_bits#"_SPLAT"; -defm : LoadPat; + defvar inst = "LOAD"#vec.lane_bits#"_SPLAT"; + defm : LoadPat, + inst>; } // Load and extend diff --git a/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll b/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll --- a/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll @@ -17,14 +17,16 @@ ; CHECK-NEXT: i64.trunc_sat_f64_s ; CHECK-NEXT: i64x2.replace_lane 1 ; CHECK-NEXT: local.tee 0 -; CHECK-NEXT: v128.const 2147483647, 2147483647 +; CHECK-NEXT: i64.const 2147483647 +; CHECK-NEXT: i64x2.splat ; CHECK-NEXT: local.tee 1 ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: i64x2.lt_s ; CHECK-NEXT: v128.bitselect ; CHECK-NEXT: local.tee 0 -; CHECK-NEXT: v128.const -2147483648, -2147483648 +; CHECK-NEXT: i64.const -2147483648 +; CHECK-NEXT: i64x2.splat ; CHECK-NEXT: local.tee 1 ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: local.get 1 @@ -58,7 +60,8 @@ ; CHECK-NEXT: i64.trunc_sat_f64_u ; CHECK-NEXT: local.tee 2 ; CHECK-NEXT: i64x2.replace_lane 1 -; CHECK-NEXT: v128.const 4294967295, 4294967295 +; CHECK-NEXT: i64.const 4294967295 +; CHECK-NEXT: i64x2.splat ; CHECK-NEXT: i64.const -1 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 1 @@ -99,14 +102,16 @@ ; CHECK-NEXT: i64.trunc_sat_f64_s ; CHECK-NEXT: i64x2.replace_lane 1 ; CHECK-NEXT: local.tee 0 -; CHECK-NEXT: v128.const 4294967295, 4294967295 +; CHECK-NEXT: i64.const 4294967295 +; CHECK-NEXT: i64x2.splat ; CHECK-NEXT: local.tee 1 ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: i64x2.lt_s ; CHECK-NEXT: v128.bitselect ; CHECK-NEXT: local.tee 0 -; CHECK-NEXT: v128.const 0, 0 +; CHECK-NEXT: i64.const 0 +; CHECK-NEXT: i64x2.splat ; CHECK-NEXT: local.tee 1 ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: local.get 1 @@ -297,9 +302,11 @@ ; CHECK-NEXT: f64x2.extract_lane 1 ; CHECK-NEXT: i32.trunc_sat_f64_s ; CHECK-NEXT: i32x4.replace_lane 1 -; CHECK-NEXT: v128.const 32767, 32767, 0, 0 +; CHECK-NEXT: i32.const 32767 +; CHECK-NEXT: i32x4.splat ; CHECK-NEXT: i32x4.min_s -; CHECK-NEXT: v128.const -32768, -32768, 0, 0 +; CHECK-NEXT: i32.const -32768 +; CHECK-NEXT: i32x4.splat ; CHECK-NEXT: i32x4.max_s ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 @@ -326,7 +333,8 @@ ; CHECK-NEXT: f64x2.extract_lane 1 ; CHECK-NEXT: i32.trunc_sat_f64_u ; CHECK-NEXT: i32x4.replace_lane 1 -; CHECK-NEXT: v128.const 65535, 65535, 0, 0 +; CHECK-NEXT: i32.const 65535 +; CHECK-NEXT: i32x4.splat ; CHECK-NEXT: i32x4.min_u ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 @@ -351,9 +359,11 @@ ; CHECK-NEXT: f64x2.extract_lane 1 ; CHECK-NEXT: i32.trunc_sat_f64_s ; CHECK-NEXT: i32x4.replace_lane 1 -; CHECK-NEXT: v128.const 65535, 65535, 0, 0 +; CHECK-NEXT: i32.const 65535 +; CHECK-NEXT: i32x4.splat ; CHECK-NEXT: i32x4.min_s -; CHECK-NEXT: v128.const 0, 0, 0, 0 +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: i32x4.splat ; CHECK-NEXT: i32x4.max_s ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 @@ -374,9 +384,11 @@ ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32x4.trunc_sat_f32x4_s -; CHECK-NEXT: v128.const 32767, 32767, 32767, 32767 +; CHECK-NEXT: i32.const 32767 +; CHECK-NEXT: i32x4.splat ; CHECK-NEXT: i32x4.min_s -; CHECK-NEXT: v128.const -32768, -32768, -32768, -32768 +; CHECK-NEXT: i32.const -32768 +; CHECK-NEXT: i32x4.splat ; CHECK-NEXT: i32x4.max_s ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 0, 1, 0, 1, 0, 1 @@ -397,7 +409,8 @@ ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32x4.trunc_sat_f32x4_u -; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535 +; CHECK-NEXT: i32.const 65535 +; CHECK-NEXT: i32x4.splat ; CHECK-NEXT: i32x4.min_u ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 0, 1, 0, 1, 0, 1 @@ -416,9 +429,11 @@ ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32x4.trunc_sat_f32x4_s -; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535 +; CHECK-NEXT: i32.const 65535 +; CHECK-NEXT: i32x4.splat ; CHECK-NEXT: i32x4.min_s -; CHECK-NEXT: v128.const 0, 0, 0, 0 +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: i32x4.splat ; CHECK-NEXT: i32x4.max_s ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 0, 1, 0, 1, 0, 1 @@ -476,13 +491,16 @@ ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.replace_lane 3 -; CHECK-NEXT: v128.const 32767, 32767, 32767, 32767 +; CHECK-NEXT: i32.const 32767 +; CHECK-NEXT: i32x4.splat ; CHECK-NEXT: local.tee 8 ; CHECK-NEXT: i32x4.min_s -; CHECK-NEXT: v128.const -32768, -32768, -32768, -32768 +; CHECK-NEXT: i32.const -32768 +; CHECK-NEXT: i32x4.splat ; CHECK-NEXT: local.tee 9 ; CHECK-NEXT: i32x4.max_s -; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535 +; CHECK-NEXT: i32.const 65535 +; CHECK-NEXT: i32x4.splat ; CHECK-NEXT: local.tee 10 ; CHECK-NEXT: v128.and ; CHECK-NEXT: local.get 4 @@ -558,7 +576,8 @@ ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: i32x4.replace_lane 3 -; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535 +; CHECK-NEXT: i32.const 65535 +; CHECK-NEXT: i32x4.splat ; CHECK-NEXT: local.tee 8 ; CHECK-NEXT: i32x4.min_u ; CHECK-NEXT: local.get 4 @@ -628,10 +647,12 @@ ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.replace_lane 3 -; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535 +; CHECK-NEXT: i32.const 65535 +; CHECK-NEXT: i32x4.splat ; CHECK-NEXT: local.tee 8 ; CHECK-NEXT: i32x4.min_s -; CHECK-NEXT: v128.const 0, 0, 0, 0 +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: i32x4.splat ; CHECK-NEXT: local.tee 9 ; CHECK-NEXT: i32x4.max_s ; CHECK-NEXT: local.get 4 @@ -1525,14 +1546,16 @@ ; CHECK-NEXT: i64.trunc_sat_f64_s ; CHECK-NEXT: i64x2.replace_lane 1 ; CHECK-NEXT: local.tee 0 -; CHECK-NEXT: v128.const 2147483647, 2147483647 +; CHECK-NEXT: i64.const 2147483647 +; CHECK-NEXT: i64x2.splat ; CHECK-NEXT: local.tee 1 ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: i64x2.lt_s ; CHECK-NEXT: v128.bitselect ; CHECK-NEXT: local.tee 0 -; CHECK-NEXT: v128.const -2147483648, -2147483648 +; CHECK-NEXT: i64.const -2147483648 +; CHECK-NEXT: i64x2.splat ; CHECK-NEXT: local.tee 1 ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: local.get 1 @@ -1564,7 +1587,8 @@ ; CHECK-NEXT: i64.trunc_sat_f64_u ; CHECK-NEXT: local.tee 2 ; CHECK-NEXT: i64x2.replace_lane 1 -; CHECK-NEXT: v128.const 4294967295, 4294967295 +; CHECK-NEXT: i64.const 4294967295 +; CHECK-NEXT: i64x2.splat ; CHECK-NEXT: i64.const -1 ; CHECK-NEXT: i64.const 0 ; CHECK-NEXT: local.get 1 @@ -1604,14 +1628,16 @@ ; CHECK-NEXT: i64.trunc_sat_f64_s ; CHECK-NEXT: i64x2.replace_lane 1 ; CHECK-NEXT: local.tee 0 -; CHECK-NEXT: v128.const 4294967295, 4294967295 +; CHECK-NEXT: i64.const 4294967295 +; CHECK-NEXT: i64x2.splat ; CHECK-NEXT: local.tee 1 ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: i64x2.lt_s ; CHECK-NEXT: v128.bitselect ; CHECK-NEXT: local.tee 0 -; CHECK-NEXT: v128.const 0, 0 +; CHECK-NEXT: i64.const 0 +; CHECK-NEXT: i64x2.splat ; CHECK-NEXT: local.tee 1 ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: local.get 1 @@ -1790,9 +1816,11 @@ ; CHECK-NEXT: f64x2.extract_lane 1 ; CHECK-NEXT: i32.trunc_sat_f64_s ; CHECK-NEXT: i32x4.replace_lane 1 -; CHECK-NEXT: v128.const 32767, 32767, 0, 0 +; CHECK-NEXT: i32.const 32767 +; CHECK-NEXT: i32x4.splat ; CHECK-NEXT: i32x4.min_s -; CHECK-NEXT: v128.const -32768, -32768, 0, 0 +; CHECK-NEXT: i32.const -32768 +; CHECK-NEXT: i32x4.splat ; CHECK-NEXT: i32x4.max_s ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 @@ -1817,7 +1845,8 @@ ; CHECK-NEXT: f64x2.extract_lane 1 ; CHECK-NEXT: i32.trunc_sat_f64_u ; CHECK-NEXT: i32x4.replace_lane 1 -; CHECK-NEXT: v128.const 65535, 65535, 0, 0 +; CHECK-NEXT: i32.const 65535 +; CHECK-NEXT: i32x4.splat ; CHECK-NEXT: i32x4.min_u ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 @@ -1841,9 +1870,11 @@ ; CHECK-NEXT: f64x2.extract_lane 1 ; CHECK-NEXT: i32.trunc_sat_f64_s ; CHECK-NEXT: i32x4.replace_lane 1 -; CHECK-NEXT: v128.const 65535, 65535, 0, 0 +; CHECK-NEXT: i32.const 65535 +; CHECK-NEXT: i32x4.splat ; CHECK-NEXT: i32x4.min_s -; CHECK-NEXT: v128.const 0, 0, 0, 0 +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: i32x4.splat ; CHECK-NEXT: i32x4.max_s ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 @@ -1862,9 +1893,11 @@ ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32x4.trunc_sat_f32x4_s -; CHECK-NEXT: v128.const 32767, 32767, 32767, 32767 +; CHECK-NEXT: i32.const 32767 +; CHECK-NEXT: i32x4.splat ; CHECK-NEXT: i32x4.min_s -; CHECK-NEXT: v128.const -32768, -32768, -32768, -32768 +; CHECK-NEXT: i32.const -32768 +; CHECK-NEXT: i32x4.splat ; CHECK-NEXT: i32x4.max_s ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 0, 1, 0, 1, 0, 1 @@ -1883,7 +1916,8 @@ ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32x4.trunc_sat_f32x4_u -; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535 +; CHECK-NEXT: i32.const 65535 +; CHECK-NEXT: i32x4.splat ; CHECK-NEXT: i32x4.min_u ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 0, 1, 0, 1, 0, 1 @@ -1901,9 +1935,11 @@ ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32x4.trunc_sat_f32x4_s -; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535 +; CHECK-NEXT: i32.const 65535 +; CHECK-NEXT: i32x4.splat ; CHECK-NEXT: i32x4.min_s -; CHECK-NEXT: v128.const 0, 0, 0, 0 +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: i32x4.splat ; CHECK-NEXT: i32x4.max_s ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i8x16.shuffle 0, 1, 4, 5, 8, 9, 12, 13, 0, 1, 0, 1, 0, 1, 0, 1 @@ -1959,13 +1995,16 @@ ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.replace_lane 3 -; CHECK-NEXT: v128.const 32767, 32767, 32767, 32767 +; CHECK-NEXT: i32.const 32767 +; CHECK-NEXT: i32x4.splat ; CHECK-NEXT: local.tee 8 ; CHECK-NEXT: i32x4.min_s -; CHECK-NEXT: v128.const -32768, -32768, -32768, -32768 +; CHECK-NEXT: i32.const -32768 +; CHECK-NEXT: i32x4.splat ; CHECK-NEXT: local.tee 9 ; CHECK-NEXT: i32x4.max_s -; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535 +; CHECK-NEXT: i32.const 65535 +; CHECK-NEXT: i32x4.splat ; CHECK-NEXT: local.tee 10 ; CHECK-NEXT: v128.and ; CHECK-NEXT: local.get 4 @@ -2039,7 +2078,8 @@ ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_u ; CHECK-NEXT: i32x4.replace_lane 3 -; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535 +; CHECK-NEXT: i32.const 65535 +; CHECK-NEXT: i32x4.splat ; CHECK-NEXT: local.tee 8 ; CHECK-NEXT: i32x4.min_u ; CHECK-NEXT: local.get 4 @@ -2108,10 +2148,12 @@ ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: i32x4.replace_lane 3 -; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535 +; CHECK-NEXT: i32.const 65535 +; CHECK-NEXT: i32x4.splat ; CHECK-NEXT: local.tee 8 ; CHECK-NEXT: i32x4.min_s -; CHECK-NEXT: v128.const 0, 0, 0, 0 +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: i32x4.splat ; CHECK-NEXT: local.tee 9 ; CHECK-NEXT: i32x4.max_s ; CHECK-NEXT: local.get 4 diff --git a/llvm/test/CodeGen/WebAssembly/masked-shifts.ll b/llvm/test/CodeGen/WebAssembly/masked-shifts.ll --- a/llvm/test/CodeGen/WebAssembly/masked-shifts.ll +++ b/llvm/test/CodeGen/WebAssembly/masked-shifts.ll @@ -107,7 +107,8 @@ ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: i8x16.splat -; CHECK-NEXT: v128.const 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 +; CHECK-NEXT: i32.const 7 +; CHECK-NEXT: i8x16.splat ; CHECK-NEXT: v128.and ; CHECK-NEXT: i8x16.extract_lane_u 0 ; CHECK-NEXT: i8x16.shl @@ -146,7 +147,8 @@ ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: i8x16.splat -; CHECK-NEXT: v128.const 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 +; CHECK-NEXT: i32.const 7 +; CHECK-NEXT: i8x16.splat ; CHECK-NEXT: v128.and ; CHECK-NEXT: i8x16.extract_lane_u 0 ; CHECK-NEXT: i8x16.shr_s @@ -185,7 +187,8 @@ ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: i8x16.splat -; CHECK-NEXT: v128.const 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 +; CHECK-NEXT: i32.const 7 +; CHECK-NEXT: i8x16.splat ; CHECK-NEXT: v128.and ; CHECK-NEXT: i8x16.extract_lane_u 0 ; CHECK-NEXT: i8x16.shr_u @@ -223,7 +226,8 @@ ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: i16x8.splat -; CHECK-NEXT: v128.const 15, 15, 15, 15, 15, 15, 15, 15 +; CHECK-NEXT: i32.const 15 +; CHECK-NEXT: i16x8.splat ; CHECK-NEXT: v128.and ; CHECK-NEXT: i16x8.extract_lane_u 0 ; CHECK-NEXT: i16x8.shl @@ -260,7 +264,8 @@ ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: i16x8.splat -; CHECK-NEXT: v128.const 15, 15, 15, 15, 15, 15, 15, 15 +; CHECK-NEXT: i32.const 15 +; CHECK-NEXT: i16x8.splat ; CHECK-NEXT: v128.and ; CHECK-NEXT: i16x8.extract_lane_u 0 ; CHECK-NEXT: i16x8.shr_s @@ -297,7 +302,8 @@ ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: i16x8.splat -; CHECK-NEXT: v128.const 15, 15, 15, 15, 15, 15, 15, 15 +; CHECK-NEXT: i32.const 15 +; CHECK-NEXT: i16x8.splat ; CHECK-NEXT: v128.and ; CHECK-NEXT: i16x8.extract_lane_u 0 ; CHECK-NEXT: i16x8.shr_u diff --git a/llvm/test/CodeGen/WebAssembly/simd-arith.ll b/llvm/test/CodeGen/WebAssembly/simd-arith.ll --- a/llvm/test/CodeGen/WebAssembly/simd-arith.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-arith.ll @@ -1604,7 +1604,8 @@ ; CHECK-LABEL: min_unordered_v4f32: ; NO-SIMD128-NOT: f32x4 ; SIMD128-NEXT: .functype min_unordered_v4f32 (v128) -> (v128){{$}} -; SIMD128-NEXT: v128.const $push[[L0:[0-9]+]]=, 0x1.4p2, 0x1.4p2, 0x1.4p2, 0x1.4p2{{$}} +; SIMD128-NEXT: f32.const $push[[C:[0-9]+]]=, 0x1.4p2{{$}} +; SIMD128-NEXT: f32x4.splat $push[[L0:[0-9]+]]=, $pop[[C]]{{$}} ; SIMD128-NEXT: f32x4.min $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x float> @min_unordered_v4f32(<4 x float> %x) { @@ -1617,7 +1618,8 @@ ; CHECK-LABEL: max_unordered_v4f32: ; NO-SIMD128-NOT: f32x4 ; SIMD128-NEXT: .functype max_unordered_v4f32 (v128) -> (v128){{$}} -; SIMD128-NEXT: v128.const $push[[L0:[0-9]+]]=, 0x1.4p2, 0x1.4p2, 0x1.4p2, 0x1.4p2 +; SIMD128-NEXT: f32.const $push[[C:[0-9]+]]=, 0x1.4p2{{$}} +; SIMD128-NEXT: f32x4.splat $push[[L0:[0-9]+]]=, $pop[[C]]{{$}} ; SIMD128-NEXT: f32x4.max $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x float> @max_unordered_v4f32(<4 x float> %x) { @@ -1630,7 +1632,8 @@ ; CHECK-LABEL: min_ordered_v4f32: ; NO-SIMD128-NOT: f32x4 ; SIMD128-NEXT: .functype min_ordered_v4f32 (v128) -> (v128){{$}} -; SIMD128-NEXT: v128.const $push[[L0:[0-9]+]]=, 0x1.4p2, 0x1.4p2, 0x1.4p2, 0x1.4p2{{$}} +; SIMD128-NEXT: f32.const $push[[C:[0-9]+]]=, 0x1.4p2{{$}} +; SIMD128-NEXT: f32x4.splat $push[[L0:[0-9]+]]=, $pop[[C]]{{$}} ; SIMD128-NEXT: f32x4.min $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x float> @min_ordered_v4f32(<4 x float> %x) { @@ -1643,7 +1646,8 @@ ; CHECK-LABEL: max_ordered_v4f32: ; NO-SIMD128-NOT: f32x4 ; SIMD128-NEXT: .functype max_ordered_v4f32 (v128) -> (v128){{$}} -; SIMD128-NEXT: v128.const $push[[L0:[0-9]+]]=, 0x1.4p2, 0x1.4p2, 0x1.4p2, 0x1.4p2{{$}} +; SIMD128-NEXT: f32.const $push[[C:[0-9]+]]=, 0x1.4p2{{$}} +; SIMD128-NEXT: f32x4.splat $push[[L0:[0-9]+]]=, $pop[[C]]{{$}} ; SIMD128-NEXT: f32x4.max $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x float> @max_ordered_v4f32(<4 x float> %x) { @@ -1700,7 +1704,8 @@ ; CHECK-LABEL: min_const_intrinsic_v4f32: ; NO-SIMD128-NOT: f32x4 ; SIMD128-NEXT: .functype min_const_intrinsic_v4f32 () -> (v128){{$}} -; SIMD128-NEXT: v128.const $push[[R:[0-9]+]]=, 0x1.4p2, 0x1.4p2, 0x1.4p2, 0x1.4p2{{$}} +; SIMD128-NEXT: f32.const $push[[C:[0-9]+]]=, 0x1.4p2{{$}} +; SIMD128-NEXT: f32x4.splat $push[[R:[0-9]+]]=, $pop[[C]]{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x float> @min_const_intrinsic_v4f32() { %a = call <4 x float> @llvm.minimum.v4f32( @@ -1713,7 +1718,8 @@ ; CHECK-LABEL: max_const_intrinsic_v4f32: ; NO-SIMD128-NOT: f32x4 ; SIMD128-NEXT: .functype max_const_intrinsic_v4f32 () -> (v128){{$}} -; SIMD128-NEXT: v128.const $push[[R:[0-9]+]]=, 0x1.5p5, 0x1.5p5, 0x1.5p5, 0x1.5p5{{$}} +; SIMD128-NEXT: f32.const $push[[C:[0-9]+]]=, 0x1.5p5{{$}} +; SIMD128-NEXT: f32x4.splat $push[[R:[0-9]+]]=, $pop[[C]]{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <4 x float> @max_const_intrinsic_v4f32() { %a = call <4 x float> @llvm.maximum.v4f32( @@ -1850,7 +1856,8 @@ ; CHECK-LABEL: min_unordered_v2f64: ; NO-SIMD128-NOT: f64x2 ; SIMD128-NEXT: .functype min_unordered_v2f64 (v128) -> (v128){{$}} -; SIMD128-NEXT: v128.const $push[[L0:[0-9]+]]=, 0x1.4p2, 0x1.4p2{{$}} +; SIMD128-NEXT: f64.const $push[[C:[0-9]+]]=, 0x1.4p2{{$}} +; SIMD128-NEXT: f64x2.splat $push[[L0:[0-9]+]]=, $pop[[C]]{{$}} ; SIMD128-NEXT: f64x2.min $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x double> @min_unordered_v2f64(<2 x double> %x) { @@ -1863,7 +1870,8 @@ ; CHECK-LABEL: max_unordered_v2f64: ; NO-SIMD128-NOT: f64x2 ; SIMD128-NEXT: .functype max_unordered_v2f64 (v128) -> (v128){{$}} -; SIMD128-NEXT: v128.const $push[[L0:[0-9]+]]=, 0x1.4p2, 0x1.4p2{{$}} +; SIMD128-NEXT: f64.const $push[[C:[0-9]+]]=, 0x1.4p2{{$}} +; SIMD128-NEXT: f64x2.splat $push[[L0:[0-9]+]]=, $pop[[C]]{{$}} ; SIMD128-NEXT: f64x2.max $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x double> @max_unordered_v2f64(<2 x double> %x) { @@ -1876,7 +1884,8 @@ ; CHECK-LABEL: min_ordered_v2f64: ; NO-SIMD128-NOT: f64x2 ; SIMD128-NEXT: .functype min_ordered_v2f64 (v128) -> (v128){{$}} -; SIMD128-NEXT: v128.const $push[[L0:[0-9]+]]=, 0x1.4p2, 0x1.4p2{{$}} +; SIMD128-NEXT: f64.const $push[[C:[0-9]+]]=, 0x1.4p2{{$}} +; SIMD128-NEXT: f64x2.splat $push[[L0:[0-9]+]]=, $pop[[C]]{{$}} ; SIMD128-NEXT: f64x2.min $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x double> @min_ordered_v2f64(<2 x double> %x) { @@ -1889,7 +1898,8 @@ ; CHECK-LABEL: max_ordered_v2f64: ; NO-SIMD128-NOT: f64x2 ; SIMD128-NEXT: .functype max_ordered_v2f64 (v128) -> (v128){{$}} -; SIMD128-NEXT: v128.const $push[[L0:[0-9]+]]=, 0x1.4p2, 0x1.4p2{{$}} +; SIMD128-NEXT: f64.const $push[[C:[0-9]+]]=, 0x1.4p2{{$}} +; SIMD128-NEXT: f64x2.splat $push[[L0:[0-9]+]]=, $pop[[C]]{{$}} ; SIMD128-NEXT: f64x2.max $push[[R:[0-9]+]]=, $0, $pop[[L0]]{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x double> @max_ordered_v2f64(<2 x double> %x) { @@ -1924,7 +1934,8 @@ ; CHECK-LABEL: min_const_intrinsic_v2f64: ; NO-SIMD128-NOT: f64x2 ; SIMD128-NEXT: .functype min_const_intrinsic_v2f64 () -> (v128){{$}} -; SIMD128-NEXT: v128.const $push[[R:[0-9]+]]=, 0x1.4p2, 0x1.4p2{{$}} +; SIMD128-NEXT: f64.const $push[[C:[0-9]+]]=, 0x1.4p2{{$}} +; SIMD128-NEXT: f64x2.splat $push[[R:[0-9]+]]=, $pop[[C]]{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x double> @min_const_intrinsic_v2f64() { %a = call <2 x double> @llvm.minimum.v2f64( @@ -1937,7 +1948,8 @@ ; CHECK-LABEL: max_const_intrinsic_v2f64: ; NO-SIMD128-NOT: f64x2 ; SIMD128-NEXT: .functype max_const_intrinsic_v2f64 () -> (v128){{$}} -; SIMD128-NEXT: v128.const $push[[R:[0-9]+]]=, 0x1.5p5, 0x1.5p5{{$}} +; SIMD128-NEXT: f64.const $push[[C:[0-9]+]]=, 0x1.5p5{{$}} +; SIMD128-NEXT: f64x2.splat $push[[R:[0-9]+]]=, $pop[[C]]{{$}} ; SIMD128-NEXT: return $pop[[R]]{{$}} define <2 x double> @max_const_intrinsic_v2f64() { %a = call <2 x double> @llvm.maximum.v2f64( diff --git a/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll b/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll --- a/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll @@ -118,107 +118,92 @@ ; CHECK-LABEL: swizzle_all_i8x16: ; CHECK: .functype swizzle_all_i8x16 (v128, v128) -> (v128) ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: global.get $push80=, __stack_pointer -; CHECK-NEXT: i32.const $push81=, 16 -; CHECK-NEXT: i32.sub $push98=, $pop80, $pop81 -; CHECK-NEXT: local.tee $push97=, $2=, $pop98 -; CHECK-NEXT: v128.store 0($pop97), $0 -; CHECK-NEXT: i8x16.extract_lane_u $push61=, $1, 0 +; CHECK-NEXT: global.get $push65=, __stack_pointer +; CHECK-NEXT: i32.const $push66=, 16 +; CHECK-NEXT: i32.sub $push83=, $pop65, $pop66 +; CHECK-NEXT: local.tee $push82=, $2=, $pop83 +; CHECK-NEXT: v128.store 0($pop82), $0 +; CHECK-NEXT: i8x16.extract_lane_u $push61=, $1, 15 ; CHECK-NEXT: i32.const $push1=, 15 ; CHECK-NEXT: i32.and $push62=, $pop61, $pop1 ; CHECK-NEXT: i32.or $push63=, $2, $pop62 -; CHECK-NEXT: v128.load8_splat $push64=, 0($pop63) -; CHECK-NEXT: i8x16.extract_lane_u $push57=, $1, 1 -; CHECK-NEXT: i32.const $push96=, 15 -; CHECK-NEXT: i32.and $push58=, $pop57, $pop96 +; CHECK-NEXT: i8x16.extract_lane_u $push57=, $1, 14 +; CHECK-NEXT: i32.const $push81=, 15 +; CHECK-NEXT: i32.and $push58=, $pop57, $pop81 ; CHECK-NEXT: i32.or $push59=, $2, $pop58 -; CHECK-NEXT: i32.load8_u $push60=, 0($pop59) -; CHECK-NEXT: i8x16.replace_lane $push65=, $pop64, 1, $pop60 -; CHECK-NEXT: i8x16.extract_lane_u $push53=, $1, 2 -; CHECK-NEXT: i32.const $push95=, 15 -; CHECK-NEXT: i32.and $push54=, $pop53, $pop95 +; CHECK-NEXT: i8x16.extract_lane_u $push53=, $1, 13 +; CHECK-NEXT: i32.const $push80=, 15 +; CHECK-NEXT: i32.and $push54=, $pop53, $pop80 ; CHECK-NEXT: i32.or $push55=, $2, $pop54 -; CHECK-NEXT: i32.load8_u $push56=, 0($pop55) -; CHECK-NEXT: i8x16.replace_lane $push66=, $pop65, 2, $pop56 -; CHECK-NEXT: i8x16.extract_lane_u $push49=, $1, 3 -; CHECK-NEXT: i32.const $push94=, 15 -; CHECK-NEXT: i32.and $push50=, $pop49, $pop94 +; CHECK-NEXT: i8x16.extract_lane_u $push49=, $1, 12 +; CHECK-NEXT: i32.const $push79=, 15 +; CHECK-NEXT: i32.and $push50=, $pop49, $pop79 ; CHECK-NEXT: i32.or $push51=, $2, $pop50 -; CHECK-NEXT: i32.load8_u $push52=, 0($pop51) -; CHECK-NEXT: i8x16.replace_lane $push67=, $pop66, 3, $pop52 -; CHECK-NEXT: i8x16.extract_lane_u $push45=, $1, 4 -; CHECK-NEXT: i32.const $push93=, 15 -; CHECK-NEXT: i32.and $push46=, $pop45, $pop93 +; CHECK-NEXT: i8x16.extract_lane_u $push45=, $1, 11 +; CHECK-NEXT: i32.const $push78=, 15 +; CHECK-NEXT: i32.and $push46=, $pop45, $pop78 ; CHECK-NEXT: i32.or $push47=, $2, $pop46 -; CHECK-NEXT: i32.load8_u $push48=, 0($pop47) -; CHECK-NEXT: i8x16.replace_lane $push68=, $pop67, 4, $pop48 -; CHECK-NEXT: i8x16.extract_lane_u $push41=, $1, 5 -; CHECK-NEXT: i32.const $push92=, 15 -; CHECK-NEXT: i32.and $push42=, $pop41, $pop92 +; CHECK-NEXT: i8x16.extract_lane_u $push41=, $1, 10 +; CHECK-NEXT: i32.const $push77=, 15 +; CHECK-NEXT: i32.and $push42=, $pop41, $pop77 ; CHECK-NEXT: i32.or $push43=, $2, $pop42 -; CHECK-NEXT: i32.load8_u $push44=, 0($pop43) -; CHECK-NEXT: i8x16.replace_lane $push69=, $pop68, 5, $pop44 -; CHECK-NEXT: i8x16.extract_lane_u $push37=, $1, 6 -; CHECK-NEXT: i32.const $push91=, 15 -; CHECK-NEXT: i32.and $push38=, $pop37, $pop91 +; CHECK-NEXT: i8x16.extract_lane_u $push37=, $1, 9 +; CHECK-NEXT: i32.const $push76=, 15 +; CHECK-NEXT: i32.and $push38=, $pop37, $pop76 ; CHECK-NEXT: i32.or $push39=, $2, $pop38 -; CHECK-NEXT: i32.load8_u $push40=, 0($pop39) -; CHECK-NEXT: i8x16.replace_lane $push70=, $pop69, 6, $pop40 -; CHECK-NEXT: i8x16.extract_lane_u $push33=, $1, 7 -; CHECK-NEXT: i32.const $push90=, 15 -; CHECK-NEXT: i32.and $push34=, $pop33, $pop90 +; CHECK-NEXT: i8x16.extract_lane_u $push33=, $1, 8 +; CHECK-NEXT: i32.const $push75=, 15 +; CHECK-NEXT: i32.and $push34=, $pop33, $pop75 ; CHECK-NEXT: i32.or $push35=, $2, $pop34 -; CHECK-NEXT: i32.load8_u $push36=, 0($pop35) -; CHECK-NEXT: i8x16.replace_lane $push71=, $pop70, 7, $pop36 -; CHECK-NEXT: i8x16.extract_lane_u $push29=, $1, 8 -; CHECK-NEXT: i32.const $push89=, 15 -; CHECK-NEXT: i32.and $push30=, $pop29, $pop89 +; CHECK-NEXT: i8x16.extract_lane_u $push29=, $1, 7 +; CHECK-NEXT: i32.const $push74=, 15 +; CHECK-NEXT: i32.and $push30=, $pop29, $pop74 ; CHECK-NEXT: i32.or $push31=, $2, $pop30 -; CHECK-NEXT: i32.load8_u $push32=, 0($pop31) -; CHECK-NEXT: i8x16.replace_lane $push72=, $pop71, 8, $pop32 -; CHECK-NEXT: i8x16.extract_lane_u $push25=, $1, 9 -; CHECK-NEXT: i32.const $push88=, 15 -; CHECK-NEXT: i32.and $push26=, $pop25, $pop88 +; CHECK-NEXT: i8x16.extract_lane_u $push25=, $1, 6 +; CHECK-NEXT: i32.const $push73=, 15 +; CHECK-NEXT: i32.and $push26=, $pop25, $pop73 ; CHECK-NEXT: i32.or $push27=, $2, $pop26 -; CHECK-NEXT: i32.load8_u $push28=, 0($pop27) -; CHECK-NEXT: i8x16.replace_lane $push73=, $pop72, 9, $pop28 -; CHECK-NEXT: i8x16.extract_lane_u $push21=, $1, 10 -; CHECK-NEXT: i32.const $push87=, 15 -; CHECK-NEXT: i32.and $push22=, $pop21, $pop87 +; CHECK-NEXT: i8x16.extract_lane_u $push21=, $1, 5 +; CHECK-NEXT: i32.const $push72=, 15 +; CHECK-NEXT: i32.and $push22=, $pop21, $pop72 ; CHECK-NEXT: i32.or $push23=, $2, $pop22 -; CHECK-NEXT: i32.load8_u $push24=, 0($pop23) -; CHECK-NEXT: i8x16.replace_lane $push74=, $pop73, 10, $pop24 -; CHECK-NEXT: i8x16.extract_lane_u $push17=, $1, 11 -; CHECK-NEXT: i32.const $push86=, 15 -; CHECK-NEXT: i32.and $push18=, $pop17, $pop86 +; CHECK-NEXT: i8x16.extract_lane_u $push17=, $1, 4 +; CHECK-NEXT: i32.const $push71=, 15 +; CHECK-NEXT: i32.and $push18=, $pop17, $pop71 ; CHECK-NEXT: i32.or $push19=, $2, $pop18 -; CHECK-NEXT: i32.load8_u $push20=, 0($pop19) -; CHECK-NEXT: i8x16.replace_lane $push75=, $pop74, 11, $pop20 -; CHECK-NEXT: i8x16.extract_lane_u $push13=, $1, 12 -; CHECK-NEXT: i32.const $push85=, 15 -; CHECK-NEXT: i32.and $push14=, $pop13, $pop85 +; CHECK-NEXT: i8x16.extract_lane_u $push13=, $1, 3 +; CHECK-NEXT: i32.const $push70=, 15 +; CHECK-NEXT: i32.and $push14=, $pop13, $pop70 ; CHECK-NEXT: i32.or $push15=, $2, $pop14 -; CHECK-NEXT: i32.load8_u $push16=, 0($pop15) -; CHECK-NEXT: i8x16.replace_lane $push76=, $pop75, 12, $pop16 -; CHECK-NEXT: i8x16.extract_lane_u $push9=, $1, 13 -; CHECK-NEXT: i32.const $push84=, 15 -; CHECK-NEXT: i32.and $push10=, $pop9, $pop84 +; CHECK-NEXT: i8x16.extract_lane_u $push9=, $1, 2 +; CHECK-NEXT: i32.const $push69=, 15 +; CHECK-NEXT: i32.and $push10=, $pop9, $pop69 ; CHECK-NEXT: i32.or $push11=, $2, $pop10 -; CHECK-NEXT: i32.load8_u $push12=, 0($pop11) -; CHECK-NEXT: i8x16.replace_lane $push77=, $pop76, 13, $pop12 -; CHECK-NEXT: i8x16.extract_lane_u $push5=, $1, 14 -; CHECK-NEXT: i32.const $push83=, 15 -; CHECK-NEXT: i32.and $push6=, $pop5, $pop83 -; CHECK-NEXT: i32.or $push7=, $2, $pop6 -; CHECK-NEXT: i32.load8_u $push8=, 0($pop7) -; CHECK-NEXT: i8x16.replace_lane $push78=, $pop77, 14, $pop8 -; CHECK-NEXT: i8x16.extract_lane_u $push0=, $1, 15 -; CHECK-NEXT: i32.const $push82=, 15 -; CHECK-NEXT: i32.and $push2=, $pop0, $pop82 +; CHECK-NEXT: i8x16.extract_lane_u $push0=, $1, 1 +; CHECK-NEXT: i32.const $push68=, 15 +; CHECK-NEXT: i32.and $push2=, $pop0, $pop68 ; CHECK-NEXT: i32.or $push3=, $2, $pop2 -; CHECK-NEXT: i32.load8_u $push4=, 0($pop3) -; CHECK-NEXT: i8x16.replace_lane $push79=, $pop78, 15, $pop4 -; CHECK-NEXT: return $pop79 +; CHECK-NEXT: i8x16.extract_lane_u $push4=, $1, 0 +; CHECK-NEXT: i32.const $push67=, 15 +; CHECK-NEXT: i32.and $push5=, $pop4, $pop67 +; CHECK-NEXT: i32.or $push6=, $2, $pop5 +; CHECK-NEXT: v128.load8_splat $push7=, 0($pop6) +; CHECK-NEXT: v128.load8_lane $push8=, 0($pop3), $pop7, 1 +; CHECK-NEXT: v128.load8_lane $push12=, 0($pop11), $pop8, 2 +; CHECK-NEXT: v128.load8_lane $push16=, 0($pop15), $pop12, 3 +; CHECK-NEXT: v128.load8_lane $push20=, 0($pop19), $pop16, 4 +; CHECK-NEXT: v128.load8_lane $push24=, 0($pop23), $pop20, 5 +; CHECK-NEXT: v128.load8_lane $push28=, 0($pop27), $pop24, 6 +; CHECK-NEXT: v128.load8_lane $push32=, 0($pop31), $pop28, 7 +; CHECK-NEXT: v128.load8_lane $push36=, 0($pop35), $pop32, 8 +; CHECK-NEXT: v128.load8_lane $push40=, 0($pop39), $pop36, 9 +; CHECK-NEXT: v128.load8_lane $push44=, 0($pop43), $pop40, 10 +; CHECK-NEXT: v128.load8_lane $push48=, 0($pop47), $pop44, 11 +; CHECK-NEXT: v128.load8_lane $push52=, 0($pop51), $pop48, 12 +; CHECK-NEXT: v128.load8_lane $push56=, 0($pop55), $pop52, 13 +; CHECK-NEXT: v128.load8_lane $push60=, 0($pop59), $pop56, 14 +; CHECK-NEXT: v128.load8_lane $push64=, 0($pop63), $pop60, 15 +; CHECK-NEXT: return $pop64 %m0 = extractelement <16 x i8> %mask, i32 0 %s0 = extractelement <16 x i8> %src, i8 %m0 %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0 @@ -423,8 +408,9 @@ ; CHECK-LABEL: undef_const_insert_f32x4: ; CHECK: .functype undef_const_insert_f32x4 () -> (v128) ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: v128.const $push0=, 0x0p0, 0x1.5p5, 0x0p0, 0x0p0 -; CHECK-NEXT: return $pop0 +; CHECK-NEXT: f32.const $push0=, 0x1.5p5 +; CHECK-NEXT: f32x4.splat $push1=, $pop0 +; CHECK-NEXT: return $pop1 %v = insertelement <4 x float> undef, float 42., i32 1 ret <4 x float> %v } diff --git a/llvm/test/CodeGen/WebAssembly/simd-conversions.ll b/llvm/test/CodeGen/WebAssembly/simd-conversions.ll --- a/llvm/test/CodeGen/WebAssembly/simd-conversions.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-conversions.ll @@ -49,16 +49,20 @@ ; CHECK: .functype convert_u_v2f64 (v128) -> (v128) ; CHECK-NEXT: # %bb.0: ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v128.const 4294967295, 4294967295 +; CHECK-NEXT: i64.const 4294967295 +; CHECK-NEXT: i64x2.splat ; CHECK-NEXT: v128.and -; CHECK-NEXT: v128.const 4841369599423283200, 4841369599423283200 +; CHECK-NEXT: i64.const 4841369599423283200 +; CHECK-NEXT: i64x2.splat ; CHECK-NEXT: v128.or ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32.const 32 ; CHECK-NEXT: i64x2.shr_u -; CHECK-NEXT: v128.const 4985484787499139072, 4985484787499139072 +; CHECK-NEXT: i64.const 4985484787499139072 +; CHECK-NEXT: i64x2.splat ; CHECK-NEXT: v128.or -; CHECK-NEXT: v128.const 0x1.00000001p84, 0x1.00000001p84 +; CHECK-NEXT: f64.const 0x1.00000001p84 +; CHECK-NEXT: f64x2.splat ; CHECK-NEXT: f64x2.sub ; CHECK-NEXT: f64x2.add ; CHECK-NEXT: # fallthrough-return diff --git a/llvm/test/CodeGen/WebAssembly/simd-load-splat.ll b/llvm/test/CodeGen/WebAssembly/simd-load-splat.ll --- a/llvm/test/CodeGen/WebAssembly/simd-load-splat.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-load-splat.ll @@ -7,10 +7,11 @@ ; CHECK-LABEL: load_splat: ; CHECK-NEXT: .functype load_splat (i32, i32) -> (i32) -; CHECK-NEXT: i32.load8_u $[[E:[0-9]+]]=, 0($0){{$}} -; CHECK-NEXT: v128.load8_splat $push[[V:[0-9]+]]=, 0($0){{$}} +; CHECK-NEXT: i32.load8_u $push[[E:[0-9]+]]=, 0($0){{$}} +; CHECK-NEXT: local.tee $push[[T:[0-9]+]]=, $[[R:[0-9]+]]=, $pop[[E]]{{$}} +; CHECK-NEXT: i8x16.splat $push[[V:[0-9]+]]=, $pop[[T]]{{$}} ; CHECK-NEXT: v128.store 0($1), $pop[[V]]{{$}} -; CHECK-NEXT: return $[[E]]{{$}} +; CHECK-NEXT: return $[[R]]{{$}} define i8 @load_splat(i8* %p, <16 x i8>* %out) { %e = load i8, i8* %p %v1 = insertelement <16 x i8> undef, i8 %e, i32 0 diff --git a/llvm/test/CodeGen/WebAssembly/simd-offset.ll b/llvm/test/CodeGen/WebAssembly/simd-offset.ll --- a/llvm/test/CodeGen/WebAssembly/simd-offset.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-offset.ll @@ -1207,7 +1207,8 @@ ; CHECK-LABEL: load_zext_v4i8_to_v4i32: ; CHECK: .functype load_zext_v4i8_to_v4i32 (i32) -> (v128) ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: v128.const 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: i8x16.splat ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: v128.load32_zero 0 ; CHECK-NEXT: i8x16.shuffle 16, 1, 2, 3, 17, 5, 6, 7, 18, 9, 10, 11, 19, 13, 14, 15 @@ -1314,7 +1315,8 @@ ; CHECK-LABEL: load_zext_v4i8_to_v4i32_with_folded_offset: ; CHECK: .functype load_zext_v4i8_to_v4i32_with_folded_offset (i32) -> (v128) ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: v128.const 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: i8x16.splat ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: v128.load32_zero 16 ; CHECK-NEXT: i8x16.shuffle 16, 1, 2, 3, 17, 5, 6, 7, 18, 9, 10, 11, 19, 13, 14, 15 @@ -1417,11 +1419,12 @@ ; CHECK-LABEL: load_zext_v4i8_to_v4i32_with_folded_gep_offset: ; CHECK: .functype load_zext_v4i8_to_v4i32_with_folded_gep_offset (i32) -> (v128) ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: v128.const 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 -; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v128.load32_zero 4 -; CHECK-NEXT: i8x16.shuffle 16, 1, 2, 3, 17, 5, 6, 7, 18, 9, 10, 11, 19, 13, 14, 15 -; CHECK-NEXT: # fallthrough-return +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: i8x16.splat +; CHECK-NEXT: local.get 0 +; CHECK-NEXT: v128.load32_zero 4 +; CHECK-NEXT: i8x16.shuffle 16, 1, 2, 3, 17, 5, 6, 7, 18, 9, 10, 11, 19, 13, 14, 15 +; CHECK-NEXT: # fallthrough-return %s = getelementptr inbounds <4 x i8>, <4 x i8>* %p, i32 1 %v = load <4 x i8>, <4 x i8>* %s %v2 = zext <4 x i8> %v to <4 x i32> @@ -1526,7 +1529,8 @@ ; CHECK-LABEL: load_zext_v4i8_to_v4i32_with_unfolded_gep_negative_offset: ; CHECK: .functype load_zext_v4i8_to_v4i32_with_unfolded_gep_negative_offset (i32) -> (v128) ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: v128.const 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: i8x16.splat ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32.const -4 ; CHECK-NEXT: i32.add @@ -1649,7 +1653,8 @@ ; CHECK-LABEL: load_zext_v4i8_to_v4i32_with_unfolded_offset: ; CHECK: .functype load_zext_v4i8_to_v4i32_with_unfolded_offset (i32) -> (v128) ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: v128.const 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: i8x16.splat ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32.const 16 ; CHECK-NEXT: i32.add @@ -1766,7 +1771,8 @@ ; CHECK-LABEL: load_zext_v4i8_to_v4i32_with_unfolded_gep_offset: ; CHECK: .functype load_zext_v4i8_to_v4i32_with_unfolded_gep_offset (i32) -> (v128) ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: v128.const 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: i8x16.splat ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: i32.const 4 ; CHECK-NEXT: i32.add @@ -1869,7 +1875,8 @@ ; CHECK-LABEL: load_zext_v4i8_to_v4i32_from_numeric_address: ; CHECK: .functype load_zext_v4i8_to_v4i32_from_numeric_address () -> (v128) ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: v128.const 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: i8x16.splat ; CHECK-NEXT: i32.const 0 ; CHECK-NEXT: v128.load32_zero 32 ; CHECK-NEXT: i8x16.shuffle 16, 1, 2, 3, 17, 5, 6, 7, 18, 9, 10, 11, 19, 13, 14, 15 @@ -1967,7 +1974,8 @@ ; CHECK-LABEL: load_zext_v4i8_to_v4i32_from_global_address: ; CHECK: .functype load_zext_v4i8_to_v4i32_from_global_address () -> (v128) ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: v128.const 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +; CHECK-NEXT: i32.const 0 +; CHECK-NEXT: i8x16.splat ; CHECK-NEXT: i32.const 0 ; CHECK-NEXT: v128.load32_zero gv_v4i8 ; CHECK-NEXT: i8x16.shuffle 16, 1, 2, 3, 17, 5, 6, 7, 18, 9, 10, 11, 19, 13, 14, 15 diff --git a/llvm/test/CodeGen/WebAssembly/simd-pr51605.ll b/llvm/test/CodeGen/WebAssembly/simd-pr51605.ll --- a/llvm/test/CodeGen/WebAssembly/simd-pr51605.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-pr51605.ll @@ -13,11 +13,10 @@ ; CHECK-LABEL: test_i8: ; CHECK: .functype test_i8 (v128) -> (v128) ; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const 1 +; CHECK-NEXT: i8x16.splat ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v128.const -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 -; CHECK-NEXT: v128.xor -; CHECK-NEXT: v128.const 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 -; CHECK-NEXT: v128.and +; CHECK-NEXT: v128.andnot ; CHECK-NEXT: # fallthrough-return %c = and <4 x i8> %b, %d = xor <4 x i8> %c, @@ -28,11 +27,10 @@ ; CHECK-LABEL: test_i16: ; CHECK: .functype test_i16 (v128) -> (v128) ; CHECK-NEXT: # %bb.0: +; CHECK-NEXT: i32.const 1 +; CHECK-NEXT: i16x8.splat ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v128.const -1, -1, -1, -1, 0, 0, 0, 0 -; CHECK-NEXT: v128.xor -; CHECK-NEXT: v128.const 1, 1, 1, 1, 0, 0, 0, 0 -; CHECK-NEXT: v128.and +; CHECK-NEXT: v128.andnot ; CHECK-NEXT: # fallthrough-return %c = and <4 x i16> %b, %d = xor <4 x i16> %c, diff --git a/llvm/test/CodeGen/WebAssembly/simd-shift-complex-splats.ll b/llvm/test/CodeGen/WebAssembly/simd-shift-complex-splats.ll --- a/llvm/test/CodeGen/WebAssembly/simd-shift-complex-splats.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-shift-complex-splats.ll @@ -47,11 +47,10 @@ ; CHECK-NEXT: i8x16.splat $push1=, $1 ; CHECK-NEXT: i8x16.splat $push0=, $2 ; CHECK-NEXT: i8x16.add $push2=, $pop1, $pop0 -; CHECK-NEXT: i8x16.shuffle $push3=, $pop2, $0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 -; CHECK-NEXT: i8x16.abs $push4=, $pop3 -; CHECK-NEXT: i8x16.extract_lane_u $push5=, $pop4, 0 -; CHECK-NEXT: i8x16.shl $push6=, $0, $pop5 -; CHECK-NEXT: return $pop6 +; CHECK-NEXT: i8x16.abs $push3=, $pop2 +; CHECK-NEXT: i8x16.extract_lane_u $push4=, $pop3, 0 +; CHECK-NEXT: i8x16.shl $push5=, $0, $pop4 +; CHECK-NEXT: return $pop5 define <16 x i8> @shl_abs_add(<16 x i8> %v, i8 %a, i8 %b) { %t1 = insertelement <16 x i8> undef, i8 %a, i32 0 %va = shufflevector <16 x i8> %t1, <16 x i8> undef, <16 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/WebAssembly/simd-vector-trunc.ll b/llvm/test/CodeGen/WebAssembly/simd-vector-trunc.ll --- a/llvm/test/CodeGen/WebAssembly/simd-vector-trunc.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-vector-trunc.ll @@ -11,7 +11,8 @@ ; CHECK-NEXT: .local v128 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v128.const 255, 255 +; CHECK-NEXT: i64.const 255 +; CHECK-NEXT: i64x2.splat ; CHECK-NEXT: local.tee 8 ; CHECK-NEXT: v128.and ; CHECK-NEXT: local.get 1 @@ -54,7 +55,8 @@ ; CHECK-NEXT: .local v128 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v128.const 255, 255, 255, 255 +; CHECK-NEXT: i32.const 255 +; CHECK-NEXT: i32x4.splat ; CHECK-NEXT: local.tee 4 ; CHECK-NEXT: v128.and ; CHECK-NEXT: local.get 1 @@ -81,7 +83,8 @@ ; CHECK-NEXT: .local v128 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v128.const 255, 255, 255, 255, 255, 255, 255, 255 +; CHECK-NEXT: i32.const 255 +; CHECK-NEXT: i16x8.splat ; CHECK-NEXT: local.tee 2 ; CHECK-NEXT: v128.and ; CHECK-NEXT: local.get 1 @@ -100,7 +103,8 @@ ; CHECK-NEXT: .local v128 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v128.const 65535, 65535 +; CHECK-NEXT: i64.const 65535 +; CHECK-NEXT: i64x2.splat ; CHECK-NEXT: local.tee 4 ; CHECK-NEXT: v128.and ; CHECK-NEXT: local.get 1 @@ -127,7 +131,8 @@ ; CHECK-NEXT: .local v128 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535 +; CHECK-NEXT: i32.const 65535 +; CHECK-NEXT: i32x4.splat ; CHECK-NEXT: local.tee 2 ; CHECK-NEXT: v128.and ; CHECK-NEXT: local.get 1 diff --git a/llvm/test/CodeGen/WebAssembly/simd.ll b/llvm/test/CodeGen/WebAssembly/simd.ll --- a/llvm/test/CodeGen/WebAssembly/simd.ll +++ b/llvm/test/CodeGen/WebAssembly/simd.ll @@ -33,7 +33,9 @@ } ; CHECK-LABEL: const_splat_v16i8: -; SIMD128: v128.const $push0=, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42{{$}} +; SIMD128: i32.const $push0=, 42{{$}} +; SIMD128: i8x16.splat $push1=, $pop0{{$}} +; SIMD128: return $pop1{{$}} define <16 x i8> @const_splat_v16i8() { ret <16 x i8> @@ -295,7 +297,9 @@ } ; CHECK-LABEL: const_splat_v8i16: -; SIMD128: v128.const $push0=, 42, 42, 42, 42, 42, 42, 42, 42{{$}} +; SIMD128: i32.const $push0=, 42{{$}} +; SIMD128: i16x8.splat $push1=, $pop0{{$}} +; SIMD128: return $pop1{{$}} define <8 x i16> @const_splat_v8i16() { ret <8 x i16> } @@ -542,7 +546,9 @@ } ; CHECK-LABEL: const_splat_v4i32: -; SIMD128: v128.const $push0=, 42, 42, 42, 42{{$}} +; SIMD128: i32.const $push0=, 42{{$}} +; SIMD128: i32x4.splat $push1=, $pop0{{$}} +; SIMD128: return $pop1{{$}} define <4 x i32> @const_splat_v4i32() { ret <4 x i32> } @@ -692,7 +698,9 @@ } ; CHECK-LABEL: const_splat_v2i64: -; SIMD128: v128.const $push0=, 42, 42{{$}} +; SIMD128: i64.const $push0=, 42{{$}} +; SIMD128: i64x2.splat $push1=, $pop0{{$}} +; SIMD128: return $pop1{{$}} define <2 x i64> @const_splat_v2i64() { ret <2 x i64> } @@ -840,7 +848,9 @@ } ; CHECK-LABEL: const_splat_v4f32 -; SIMD128: v128.const $push0=, 0x1.5p5, 0x1.5p5, 0x1.5p5, 0x1.5p5{{$}} +; SIMD128: f32.const $push0=, 0x1.5p5{{$}} +; SIMD128: f32x4.splat $push1=, $pop0{{$}} +; SIMD128: return $pop1{{$}} define <4 x float> @const_splat_v4f32() { ret <4 x float> } @@ -990,7 +1000,9 @@ } ; CHECK-LABEL: const_splat_v2f64: -; SIMD128: v128.const $push0=, 0x1.5p5, 0x1.5p5{{$}} +; SIMD128: f64.const $push0=, 0x1.5p5{{$}} +; SIMD128: f64x2.splat $push1=, $pop0{{$}} +; SIMD128: return $pop1{{$}} define <2 x double> @const_splat_v2f64() { ret <2 x double> }