diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISD.def b/llvm/lib/Target/WebAssembly/WebAssemblyISD.def --- a/llvm/lib/Target/WebAssembly/WebAssemblyISD.def +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISD.def @@ -26,6 +26,7 @@ HANDLE_NODETYPE(BR_IF) HANDLE_NODETYPE(BR_TABLE) HANDLE_NODETYPE(SHUFFLE) +HANDLE_NODETYPE(SWIZZLE) HANDLE_NODETYPE(VEC_SHL) HANDLE_NODETYPE(VEC_SHR_S) HANDLE_NODETYPE(VEC_SHR_U) diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp --- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -1292,68 +1292,116 @@ const EVT VecT = Op.getValueType(); const EVT LaneT = Op.getOperand(0).getValueType(); const size_t Lanes = Op.getNumOperands(); + bool CanSwizzle = Subtarget->hasUnimplementedSIMD128() && VecT == MVT::v16i8; + + // BUILD_VECTORs are lowered to the instruction that initializes the highest + // possible number of lanes at once followed by a sequence of replace_lane + // instructions to individually initialize any remaining lanes. + + // TODO: Tune this. For example, lanewise swizzling is very expensive, so + // swizzled lanes should be given greater weight. + + // TODO: Investigate building vectors by shuffling together vectors built by + // separately specialized means. + auto IsConstant = [](const SDValue &V) { return V.getOpcode() == ISD::Constant || V.getOpcode() == ISD::ConstantFP; }; - // Find the most common operand, which is approximately the best to splat - using Entry = std::pair; - SmallVector ValueCounts; - size_t NumConst = 0, NumDynamic = 0; - for (const SDValue &Lane : Op->op_values()) { - if (Lane.isUndef()) { - continue; - } else if (IsConstant(Lane)) { - NumConst++; - } else { - NumDynamic++; - } - auto CountIt = std::find_if(ValueCounts.begin(), ValueCounts.end(), - [&Lane](Entry A) { return A.first == Lane; }); - if (CountIt == ValueCounts.end()) { - ValueCounts.emplace_back(Lane, 1); + // Returns the source vector and index vector pair if they exist. Checks for: + // (extract_vector_elt + // $src, + // (sign_extend_inreg (extract_vector_elt $indices, $i)) + // ) + auto GetSwizzleSrcs = [](size_t I, const SDValue &Lane) { + auto Bail = std::make_pair(SDValue(), SDValue()); + if (Lane->getOpcode() != ISD::EXTRACT_VECTOR_ELT) + return Bail; + const SDValue &SrcVec = Lane->getOperand(0); + const SDValue &IndexExt = Lane->getOperand(1); + if (IndexExt->getOpcode() != ISD::SIGN_EXTEND_INREG) + return Bail; + const SDValue &Index = IndexExt->getOperand(0); + if (Index->getOpcode() != ISD::EXTRACT_VECTOR_ELT) + return Bail; + const SDValue &IndexVec = Index->getOperand(0); + if (SrcVec.getValueType() != MVT::v16i8 || + IndexVec.getValueType() != MVT::v16i8 || + Index->getOperand(1)->getOpcode() != ISD::Constant || + Index->getConstantOperandVal(1) != I) + return Bail; + return std::make_pair(SrcVec, IndexVec); + }; + + using ValueEntry = std::pair; + SmallVector SplatValueCounts; + + using SwizzleEntry = std::pair, size_t>; + SmallVector SwizzleCounts; + + auto AddCount = [](auto &Counts, const auto &Val) { + auto CountIt = std::find_if(Counts.begin(), Counts.end(), + [&Val](auto E) { return E.first == Val; }); + if (CountIt == Counts.end()) { + Counts.emplace_back(Val, 1); } else { CountIt->second++; } + }; + + auto GetMostCommon = [](auto &Counts) { + auto CommonIt = + std::max_element(Counts.begin(), Counts.end(), + [](auto A, auto B) { return A.second < B.second; }); + assert(CommonIt != Counts.end() && "Unexpected all-undef build_vector"); + return *CommonIt; + }; + + size_t NumConstantLanes = 0; + + // Count eligible lanes for each type of vector creation op + for (size_t I = 0; I < Lanes; ++I) { + const SDValue &Lane = Op->getOperand(I); + if (Lane.isUndef()) + continue; + + AddCount(SplatValueCounts, Lane); + + if (IsConstant(Lane)) { + NumConstantLanes++; + } else if (CanSwizzle) { + auto SwizzleSrcs = GetSwizzleSrcs(I, Lane); + if (SwizzleSrcs.first) + AddCount(SwizzleCounts, SwizzleSrcs); + } } - auto CommonIt = - std::max_element(ValueCounts.begin(), ValueCounts.end(), - [](Entry A, Entry B) { return A.second < B.second; }); - assert(CommonIt != ValueCounts.end() && "Unexpected all-undef build_vector"); - SDValue SplatValue = CommonIt->first; - size_t NumCommon = CommonIt->second; - - // If v128.const is available, consider using it instead of a splat + + SDValue SplatValue; + size_t NumSplatLanes; + std::tie(SplatValue, NumSplatLanes) = GetMostCommon(SplatValueCounts); + + SDValue SwizzleSrc; + SDValue SwizzleIndices; + size_t NumSwizzleLanes = 0; + if (SwizzleCounts.size()) + std::forward_as_tuple(std::tie(SwizzleSrc, SwizzleIndices), + NumSwizzleLanes) = GetMostCommon(SwizzleCounts); + + // Predicate returning true if the lane is properly initialized by the + // original instruction + std::function IsLaneConstructed; + SDValue Result; if (Subtarget->hasUnimplementedSIMD128()) { - // {i32,i64,f32,f64}.const opcode, and value - const size_t ConstBytes = 1 + std::max(size_t(4), 16 / Lanes); - // SIMD prefix and opcode - const size_t SplatBytes = 2; - const size_t SplatConstBytes = SplatBytes + ConstBytes; - // SIMD prefix, opcode, and lane index - const size_t ReplaceBytes = 3; - const size_t ReplaceConstBytes = ReplaceBytes + ConstBytes; - // SIMD prefix, v128.const opcode, and 128-bit value - const size_t VecConstBytes = 18; - // Initial v128.const and a replace_lane for each non-const operand - const size_t ConstInitBytes = VecConstBytes + NumDynamic * ReplaceBytes; - // Initial splat and all necessary replace_lanes - const size_t SplatInitBytes = - IsConstant(SplatValue) - // Initial constant splat - ? (SplatConstBytes + - // Constant replace_lanes - (NumConst - NumCommon) * ReplaceConstBytes + - // Dynamic replace_lanes - (NumDynamic * ReplaceBytes)) - // Initial dynamic splat - : (SplatBytes + - // Constant replace_lanes - (NumConst * ReplaceConstBytes) + - // Dynamic replace_lanes - (NumDynamic - NumCommon) * ReplaceBytes); - if (ConstInitBytes < SplatInitBytes) { - // Create build_vector that will lower to initial v128.const + // Prefer swizzles over vector consts over splats + if (NumSwizzleLanes >= NumSplatLanes && + NumSwizzleLanes >= NumConstantLanes) { + Result = DAG.getNode(WebAssemblyISD::SWIZZLE, DL, VecT, SwizzleSrc, + SwizzleIndices); + auto Swizzled = std::make_pair(SwizzleSrc, SwizzleIndices); + IsLaneConstructed = [&, Swizzled](size_t I, const SDValue &Lane) { + return Swizzled == GetSwizzleSrcs(I, Lane); + }; + } else if (NumConstantLanes >= NumSplatLanes) { SmallVector ConstLanes; for (const SDValue &Lane : Op->op_values()) { if (IsConstant(Lane)) { @@ -1364,35 +1412,35 @@ ConstLanes.push_back(DAG.getConstant(0, DL, LaneT)); } } - SDValue Result = DAG.getBuildVector(VecT, DL, ConstLanes); - // Add replace_lane instructions for non-const lanes - for (size_t I = 0; I < Lanes; ++I) { - const SDValue &Lane = Op->getOperand(I); - if (!Lane.isUndef() && !IsConstant(Lane)) - Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecT, Result, Lane, - DAG.getConstant(I, DL, MVT::i32)); - } - return Result; + Result = DAG.getBuildVector(VecT, DL, ConstLanes); + IsLaneConstructed = [&](size_t _, const SDValue &Lane) { + return IsConstant(Lane); + }; } } - // Use a splat for the initial vector - SDValue Result; - // Possibly a load_splat - LoadSDNode *SplattedLoad; - if (Subtarget->hasUnimplementedSIMD128() && - (SplattedLoad = dyn_cast(SplatValue)) && - SplattedLoad->getMemoryVT() == VecT.getVectorElementType()) { - Result = DAG.getNode(WebAssemblyISD::LOAD_SPLAT, DL, VecT, SplatValue); - } else { - Result = DAG.getSplatBuildVector(VecT, DL, SplatValue); + if (!Result) { + // Use a splat, but possibly a load_splat + LoadSDNode *SplattedLoad; + if (Subtarget->hasUnimplementedSIMD128() && + (SplattedLoad = dyn_cast(SplatValue)) && + SplattedLoad->getMemoryVT() == VecT.getVectorElementType()) { + Result = DAG.getNode(WebAssemblyISD::LOAD_SPLAT, DL, VecT, SplatValue); + } else { + Result = DAG.getSplatBuildVector(VecT, DL, SplatValue); + } + IsLaneConstructed = [&](size_t _, const SDValue &Lane) { + return Lane == SplatValue; + }; } - // Add replace_lane instructions for other values + + // Add replace_lane instructions for any unhandled values for (size_t I = 0; I < Lanes; ++I) { const SDValue &Lane = Op->getOperand(I); - if (Lane != SplatValue) + if (!Lane.isUndef() && !IsLaneConstructed(I, Lane)) Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecT, Result, Lane, DAG.getConstant(I, DL, MVT::i32)); } + return Result; } diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td --- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td +++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td @@ -275,6 +275,15 @@ (i32 LaneIdx32:$mE), (i32 LaneIdx32:$mF)))>; } +// Swizzle lanes: v8x16.swizzle +def wasm_swizzle_t : SDTypeProfile<1, 2, []>; +def wasm_swizzle : SDNode<"WebAssemblyISD::SWIZZLE", wasm_swizzle_t>; +defm SWIZZLE : + SIMD_I<(outs V128:$dst), (ins V128:$src, V128:$mask), (outs), (ins), + [(set (v16i8 V128:$dst), + (wasm_swizzle (v16i8 V128:$src), (v16i8 V128:$mask)))], + "v8x16.swizzle\t$dst, $src, $mask", "v8x16.swizzle", 192>; + // Create vector with identical lanes: splat def splat2 : PatFrag<(ops node:$x), (build_vector node:$x, node:$x)>; def splat4 : PatFrag<(ops node:$x), (build_vector diff --git a/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll b/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll --- a/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-build-vector.ll @@ -7,13 +7,12 @@ target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" target triple = "wasm32-unknown-unknown" -; CHECK-LABEL: same_const_one_replaced_i8x16: -; CHECK-NEXT: .functype same_const_one_replaced_i8x16 (i32) -> (v128) -; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, 42 -; CHECK-NEXT: i16x8.splat $push[[L1:[0-9]+]]=, $pop[[L0]] -; CHECK-NEXT: i16x8.replace_lane $push[[L2:[0-9]+]]=, $pop[[L1]], 5, $0 -; CHECK-NEXT: return $pop[[L2]] -define <8 x i16> @same_const_one_replaced_i8x16(i16 %x) { +; CHECK-LABEL: same_const_one_replaced_i16x8: +; CHECK-NEXT: .functype same_const_one_replaced_i16x8 (i32) -> (v128) +; CHECK-NEXT: v128.const $push[[L0:[0-9]+]]=, 42, 42, 42, 42, 42, 0, 42, 42 +; CHECK-NEXT: i16x8.replace_lane $push[[L1:[0-9]+]]=, $pop[[L0]], 5, $0 +; CHECK-NEXT: return $pop[[L1]] +define <8 x i16> @same_const_one_replaced_i16x8(i16 %x) { %v = insertelement <8 x i16> , i16 %x, @@ -21,12 +20,12 @@ ret <8 x i16> %v } -; CHECK-LABEL: different_const_one_replaced_i8x16: -; CHECK-NEXT: .functype different_const_one_replaced_i8x16 (i32) -> (v128) +; CHECK-LABEL: different_const_one_replaced_i16x8: +; CHECK-NEXT: .functype different_const_one_replaced_i16x8 (i32) -> (v128) ; CHECK-NEXT: v128.const $push[[L0:[0-9]+]]=, 1, -2, 3, -4, 5, 0, 7, -8 ; CHECK-NEXT: i16x8.replace_lane $push[[L1:[0-9]+]]=, $pop[[L0]], 5, $0 ; CHECK-NEXT: return $pop[[L1]] -define <8 x i16> @different_const_one_replaced_i8x16(i16 %x) { +define <8 x i16> @different_const_one_replaced_i16x8(i16 %x) { %v = insertelement <8 x i16> , i16 %x, @@ -36,10 +35,9 @@ ; CHECK-LABEL: same_const_one_replaced_f32x4: ; CHECK-NEXT: .functype same_const_one_replaced_f32x4 (f32) -> (v128) -; CHECK-NEXT: f32.const $push[[L0:[0-9]+]]=, 0x1.5p5 -; CHECK-NEXT: f32x4.splat $push[[L1:[0-9]+]]=, $pop[[L0]] -; CHECK-NEXT: f32x4.replace_lane $push[[L2:[0-9]+]]=, $pop[[L1]], 2, $0 -; CHECK-NEXT: return $pop[[L2]] +; CHECK-NEXT: v128.const $push[[L0:[0-9]+]]=, 0x1.5p5, 0x1.5p5, 0x0p0, 0x1.5p5 +; CHECK-NEXT: f32x4.replace_lane $push[[L1:[0-9]+]]=, $pop[[L0]], 2, $0 +; CHECK-NEXT: return $pop[[L1]] define <4 x float> @same_const_one_replaced_f32x4(float %x) { %v = insertelement <4 x float> , @@ -63,11 +61,8 @@ ; CHECK-LABEL: splat_common_const_i32x4: ; CHECK-NEXT: .functype splat_common_const_i32x4 () -> (v128) -; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, 3 -; CHECK-NEXT: i32x4.splat $push[[L1:[0-9]+]]=, $pop[[L0]] -; CHECK-NEXT: i32.const $push[[L2:[0-9]+]]=, 1 -; CHECK-NEXT: i32x4.replace_lane $push[[L3:[0-9]+]]=, $pop[[L1]], 3, $pop[[L2]] -; CHECK-NEXT: return $pop[[L3]] +; CHECK-NEXT: v128.const $push[[L0:[0-9]+]]=, 0, 3, 3, 1 +; CHECK-NEXT: return $pop[[L0]] define <4 x i32> @splat_common_const_i32x4() { ret <4 x i32> } @@ -92,11 +87,159 @@ ret <8 x i16> %v7 } +; CHECK-LABEL: swizzle_one_i8x16: +; CHECK-NEXT: .functype swizzle_one_i8x16 (v128, v128) -> (v128) +; CHECK-NEXT: v8x16.swizzle $push[[L0:[0-9]+]]=, $0, $1 +; CHECK-NEXT: return $pop[[L0]] +define <16 x i8> @swizzle_one_i8x16(<16 x i8> %src, <16 x i8> %mask) { + %m0 = extractelement <16 x i8> %mask, i32 0 + %s0 = extractelement <16 x i8> %src, i8 %m0 + %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0 + ret <16 x i8> %v0 +} + +; CHECK-LABEL: swizzle_all_i8x16: +; CHECK-NEXT: .functype swizzle_all_i8x16 (v128, v128) -> (v128) +; CHECK-NEXT: v8x16.swizzle $push[[L0:[0-9]+]]=, $0, $1 +; CHECK-NEXT: return $pop[[L0]] +define <16 x i8> @swizzle_all_i8x16(<16 x i8> %src, <16 x i8> %mask) { + %m0 = extractelement <16 x i8> %mask, i32 0 + %s0 = extractelement <16 x i8> %src, i8 %m0 + %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0 + %m1 = extractelement <16 x i8> %mask, i32 1 + %s1 = extractelement <16 x i8> %src, i8 %m1 + %v1 = insertelement <16 x i8> %v0, i8 %s1, i32 1 + %m2 = extractelement <16 x i8> %mask, i32 2 + %s2 = extractelement <16 x i8> %src, i8 %m2 + %v2 = insertelement <16 x i8> %v1, i8 %s2, i32 2 + %m3 = extractelement <16 x i8> %mask, i32 3 + %s3 = extractelement <16 x i8> %src, i8 %m3 + %v3 = insertelement <16 x i8> %v2, i8 %s3, i32 3 + %m4 = extractelement <16 x i8> %mask, i32 4 + %s4 = extractelement <16 x i8> %src, i8 %m4 + %v4 = insertelement <16 x i8> %v3, i8 %s4, i32 4 + %m5 = extractelement <16 x i8> %mask, i32 5 + %s5 = extractelement <16 x i8> %src, i8 %m5 + %v5 = insertelement <16 x i8> %v4, i8 %s5, i32 5 + %m6 = extractelement <16 x i8> %mask, i32 6 + %s6 = extractelement <16 x i8> %src, i8 %m6 + %v6 = insertelement <16 x i8> %v5, i8 %s6, i32 6 + %m7 = extractelement <16 x i8> %mask, i32 7 + %s7 = extractelement <16 x i8> %src, i8 %m7 + %v7 = insertelement <16 x i8> %v6, i8 %s7, i32 7 + %m8 = extractelement <16 x i8> %mask, i32 8 + %s8 = extractelement <16 x i8> %src, i8 %m8 + %v8 = insertelement <16 x i8> %v7, i8 %s8, i32 8 + %m9 = extractelement <16 x i8> %mask, i32 9 + %s9 = extractelement <16 x i8> %src, i8 %m9 + %v9 = insertelement <16 x i8> %v8, i8 %s9, i32 9 + %m10 = extractelement <16 x i8> %mask, i32 10 + %s10 = extractelement <16 x i8> %src, i8 %m10 + %v10 = insertelement <16 x i8> %v9, i8 %s10, i32 10 + %m11 = extractelement <16 x i8> %mask, i32 11 + %s11 = extractelement <16 x i8> %src, i8 %m11 + %v11 = insertelement <16 x i8> %v10, i8 %s11, i32 11 + %m12 = extractelement <16 x i8> %mask, i32 12 + %s12 = extractelement <16 x i8> %src, i8 %m12 + %v12 = insertelement <16 x i8> %v11, i8 %s12, i32 12 + %m13 = extractelement <16 x i8> %mask, i32 13 + %s13 = extractelement <16 x i8> %src, i8 %m13 + %v13 = insertelement <16 x i8> %v12, i8 %s13, i32 13 + %m14 = extractelement <16 x i8> %mask, i32 14 + %s14 = extractelement <16 x i8> %src, i8 %m14 + %v14 = insertelement <16 x i8> %v13, i8 %s14, i32 14 + %m15 = extractelement <16 x i8> %mask, i32 15 + %s15 = extractelement <16 x i8> %src, i8 %m15 + %v15 = insertelement <16 x i8> %v14, i8 %s15, i32 15 + ret <16 x i8> %v15 +} + +; CHECK-LABEL: swizzle_one_i16x8: +; CHECK-NEXT: .functype swizzle_one_i16x8 (v128, v128) -> (v128) +; CHECK-NOT: swizzle +; CHECK: return +define <8 x i16> @swizzle_one_i16x8(<8 x i16> %src, <8 x i16> %mask) { + %m0 = extractelement <8 x i16> %mask, i32 0 + %s0 = extractelement <8 x i16> %src, i16 %m0 + %v0 = insertelement <8 x i16> undef, i16 %s0, i32 0 + ret <8 x i16> %v0 +} + +; CHECK-LABEL: mashup_swizzle_i8x16: +; CHECK-NEXT: .functype mashup_swizzle_i8x16 (v128, v128, i32) -> (v128) +; CHECK-NEXT: v8x16.swizzle $push[[L0:[0-9]+]]=, $0, $1 +; CHECK: i8x16.replace_lane +; CHECK: i8x16.replace_lane +; CHECK: i8x16.replace_lane +; CHECK: i8x16.replace_lane +; CHECK: return +define <16 x i8> @mashup_swizzle_i8x16(<16 x i8> %src, <16 x i8> %mask, i8 %splatted) { + ; swizzle 0 + %m0 = extractelement <16 x i8> %mask, i32 0 + %s0 = extractelement <16 x i8> %src, i8 %m0 + %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0 + ; swizzle 7 + %m1 = extractelement <16 x i8> %mask, i32 7 + %s1 = extractelement <16 x i8> %src, i8 %m1 + %v1 = insertelement <16 x i8> %v0, i8 %s1, i32 7 + ; splat 3 + %v2 = insertelement <16 x i8> %v1, i8 %splatted, i32 3 + ; splat 12 + %v3 = insertelement <16 x i8> %v2, i8 %splatted, i32 12 + ; const 4 + %v4 = insertelement <16 x i8> %v3, i8 42, i32 4 + ; const 14 + %v5 = insertelement <16 x i8> %v4, i8 42, i32 14 + ret <16 x i8> %v5 +} + +; CHECK-LABEL: mashup_const_i8x16: +; CHECK-NEXT: .functype mashup_const_i8x16 (v128, v128, i32) -> (v128) +; CHECK: v128.const $push[[L0:[0-9]+]]=, 0, 0, 0, 0, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 0 +; CHECK: i8x16.replace_lane +; CHECK: i8x16.replace_lane +; CHECK: i8x16.replace_lane +; CHECK: return +define <16 x i8> @mashup_const_i8x16(<16 x i8> %src, <16 x i8> %mask, i8 %splatted) { + ; swizzle 0 + %m0 = extractelement <16 x i8> %mask, i32 0 + %s0 = extractelement <16 x i8> %src, i8 %m0 + %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0 + ; splat 3 + %v1 = insertelement <16 x i8> %v0, i8 %splatted, i32 3 + ; splat 12 + %v2 = insertelement <16 x i8> %v1, i8 %splatted, i32 12 + ; const 4 + %v3 = insertelement <16 x i8> %v2, i8 42, i32 4 + ; const 14 + %v4 = insertelement <16 x i8> %v3, i8 42, i32 14 + ret <16 x i8> %v4 +} + +; CHECK-LABEL: mashup_splat_i8x16: +; CHECK-NEXT: .functype mashup_splat_i8x16 (v128, v128, i32) -> (v128) +; CHECK: i8x16.splat $push[[L0:[0-9]+]]=, $2 +; CHECK: i8x16.replace_lane +; CHECK: i8x16.replace_lane +; CHECK: return +define <16 x i8> @mashup_splat_i8x16(<16 x i8> %src, <16 x i8> %mask, i8 %splatted) { + ; swizzle 0 + %m0 = extractelement <16 x i8> %mask, i32 0 + %s0 = extractelement <16 x i8> %src, i8 %m0 + %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0 + ; splat 3 + %v1 = insertelement <16 x i8> %v0, i8 %splatted, i32 3 + ; splat 12 + %v2 = insertelement <16 x i8> %v1, i8 %splatted, i32 12 + ; const 4 + %v3 = insertelement <16 x i8> %v2, i8 42, i32 4 + ret <16 x i8> %v3 +} + ; CHECK-LABEL: undef_const_insert_f32x4: ; CHECK-NEXT: .functype undef_const_insert_f32x4 () -> (v128) -; CHECK-NEXT: f32.const $push[[L0:[0-9]+]]=, 0x1.5p5 -; CHECK-NEXT: f32x4.splat $push[[L1:[0-9]+]]=, $pop[[L0]] -; CHECK-NEXT: return $pop[[L1]] +; CHECK-NEXT: v128.const $push[[L0:[0-9]+]]=, 0x0p0, 0x1.5p5, 0x0p0, 0x0p0 +; CHECK-NEXT: return $pop[[L0]] define <4 x float> @undef_const_insert_f32x4() { %v = insertelement <4 x float> undef, float 42., i32 1 ret <4 x float> %v diff --git a/llvm/test/MC/WebAssembly/simd-encodings.s b/llvm/test/MC/WebAssembly/simd-encodings.s --- a/llvm/test/MC/WebAssembly/simd-encodings.s +++ b/llvm/test/MC/WebAssembly/simd-encodings.s @@ -463,6 +463,9 @@ # CHECK: f64x2.convert_i64x2_u # encoding: [0xfd,0xb2,0x01] f64x2.convert_i64x2_u + # CHECK: v8x16.swizzle # encoding: [0xfd,0xc0,0x01] + v8x16.swizzle + # CHECK: v8x16.load_splat 48 # encoding: [0xfd,0xc2,0x01,0x00,0x30] v8x16.load_splat 48