Skip to content

Commit d5b7a4e

Browse files
committedOct 9, 2019
[WebAssembly] v8x16.swizzle and rewrite BUILD_VECTOR lowering
Summary: Adds the new v8x16.swizzle SIMD instruction as specified at https://github.com/WebAssembly/simd/blob/master/proposals/simd/SIMD.md#swizzling-using-variable-indices. In addition to adding swizzles as a candidate lowering in LowerBUILD_VECTOR, also rewrites and simplifies the lowering to minimize the number of replace_lanes necessary rather than trying to minimize code size. This leads to more uses of v128.const instead of splats, which is expected to increase performance. The new code will be easier to tune once V8 implements all the vector construction operations, and it will also be easier to add new candidate instructions in the future if necessary. Reviewers: aheejin, dschuff Subscribers: sbc100, jgravelle-google, hiraditya, sunfish, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D68527 llvm-svn: 374188
1 parent 0115c10 commit d5b7a4e

File tree

5 files changed

+300
-96
lines changed

5 files changed

+300
-96
lines changed
 

‎llvm/lib/Target/WebAssembly/WebAssemblyISD.def

+1
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ HANDLE_NODETYPE(WrapperPIC)
2626
HANDLE_NODETYPE(BR_IF)
2727
HANDLE_NODETYPE(BR_TABLE)
2828
HANDLE_NODETYPE(SHUFFLE)
29+
HANDLE_NODETYPE(SWIZZLE)
2930
HANDLE_NODETYPE(VEC_SHL)
3031
HANDLE_NODETYPE(VEC_SHR_S)
3132
HANDLE_NODETYPE(VEC_SHR_U)

‎llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp

+122-74
Original file line numberDiff line numberDiff line change
@@ -1292,68 +1292,116 @@ SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op,
12921292
const EVT VecT = Op.getValueType();
12931293
const EVT LaneT = Op.getOperand(0).getValueType();
12941294
const size_t Lanes = Op.getNumOperands();
1295+
bool CanSwizzle = Subtarget->hasUnimplementedSIMD128() && VecT == MVT::v16i8;
1296+
1297+
// BUILD_VECTORs are lowered to the instruction that initializes the highest
1298+
// possible number of lanes at once followed by a sequence of replace_lane
1299+
// instructions to individually initialize any remaining lanes.
1300+
1301+
// TODO: Tune this. For example, lanewise swizzling is very expensive, so
1302+
// swizzled lanes should be given greater weight.
1303+
1304+
// TODO: Investigate building vectors by shuffling together vectors built by
1305+
// separately specialized means.
1306+
12951307
auto IsConstant = [](const SDValue &V) {
12961308
return V.getOpcode() == ISD::Constant || V.getOpcode() == ISD::ConstantFP;
12971309
};
12981310

1299-
// Find the most common operand, which is approximately the best to splat
1300-
using Entry = std::pair<SDValue, size_t>;
1301-
SmallVector<Entry, 16> ValueCounts;
1302-
size_t NumConst = 0, NumDynamic = 0;
1303-
for (const SDValue &Lane : Op->op_values()) {
1304-
if (Lane.isUndef()) {
1305-
continue;
1306-
} else if (IsConstant(Lane)) {
1307-
NumConst++;
1308-
} else {
1309-
NumDynamic++;
1310-
}
1311-
auto CountIt = std::find_if(ValueCounts.begin(), ValueCounts.end(),
1312-
[&Lane](Entry A) { return A.first == Lane; });
1313-
if (CountIt == ValueCounts.end()) {
1314-
ValueCounts.emplace_back(Lane, 1);
1311+
// Returns the source vector and index vector pair if they exist. Checks for:
1312+
// (extract_vector_elt
1313+
// $src,
1314+
// (sign_extend_inreg (extract_vector_elt $indices, $i))
1315+
// )
1316+
auto GetSwizzleSrcs = [](size_t I, const SDValue &Lane) {
1317+
auto Bail = std::make_pair(SDValue(), SDValue());
1318+
if (Lane->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
1319+
return Bail;
1320+
const SDValue &SwizzleSrc = Lane->getOperand(0);
1321+
const SDValue &IndexExt = Lane->getOperand(1);
1322+
if (IndexExt->getOpcode() != ISD::SIGN_EXTEND_INREG)
1323+
return Bail;
1324+
const SDValue &Index = IndexExt->getOperand(0);
1325+
if (Index->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
1326+
return Bail;
1327+
const SDValue &SwizzleIndices = Index->getOperand(0);
1328+
if (SwizzleSrc.getValueType() != MVT::v16i8 ||
1329+
SwizzleIndices.getValueType() != MVT::v16i8 ||
1330+
Index->getOperand(1)->getOpcode() != ISD::Constant ||
1331+
Index->getConstantOperandVal(1) != I)
1332+
return Bail;
1333+
return std::make_pair(SwizzleSrc, SwizzleIndices);
1334+
};
1335+
1336+
using ValueEntry = std::pair<SDValue, size_t>;
1337+
SmallVector<ValueEntry, 16> SplatValueCounts;
1338+
1339+
using SwizzleEntry = std::pair<std::pair<SDValue, SDValue>, size_t>;
1340+
SmallVector<SwizzleEntry, 16> SwizzleCounts;
1341+
1342+
auto AddCount = [](auto &Counts, const auto &Val) {
1343+
auto CountIt = std::find_if(Counts.begin(), Counts.end(),
1344+
[&Val](auto E) { return E.first == Val; });
1345+
if (CountIt == Counts.end()) {
1346+
Counts.emplace_back(Val, 1);
13151347
} else {
13161348
CountIt->second++;
13171349
}
1350+
};
1351+
1352+
auto GetMostCommon = [](auto &Counts) {
1353+
auto CommonIt =
1354+
std::max_element(Counts.begin(), Counts.end(),
1355+
[](auto A, auto B) { return A.second < B.second; });
1356+
assert(CommonIt != Counts.end() && "Unexpected all-undef build_vector");
1357+
return *CommonIt;
1358+
};
1359+
1360+
size_t NumConstantLanes = 0;
1361+
1362+
// Count eligible lanes for each type of vector creation op
1363+
for (size_t I = 0; I < Lanes; ++I) {
1364+
const SDValue &Lane = Op->getOperand(I);
1365+
if (Lane.isUndef())
1366+
continue;
1367+
1368+
AddCount(SplatValueCounts, Lane);
1369+
1370+
if (IsConstant(Lane)) {
1371+
NumConstantLanes++;
1372+
} else if (CanSwizzle) {
1373+
auto SwizzleSrcs = GetSwizzleSrcs(I, Lane);
1374+
if (SwizzleSrcs.first)
1375+
AddCount(SwizzleCounts, SwizzleSrcs);
1376+
}
13181377
}
1319-
auto CommonIt =
1320-
std::max_element(ValueCounts.begin(), ValueCounts.end(),
1321-
[](Entry A, Entry B) { return A.second < B.second; });
1322-
assert(CommonIt != ValueCounts.end() && "Unexpected all-undef build_vector");
1323-
SDValue SplatValue = CommonIt->first;
1324-
size_t NumCommon = CommonIt->second;
1325-
1326-
// If v128.const is available, consider using it instead of a splat
1378+
1379+
SDValue SplatValue;
1380+
size_t NumSplatLanes;
1381+
std::tie(SplatValue, NumSplatLanes) = GetMostCommon(SplatValueCounts);
1382+
1383+
SDValue SwizzleSrc;
1384+
SDValue SwizzleIndices;
1385+
size_t NumSwizzleLanes = 0;
1386+
if (SwizzleCounts.size())
1387+
std::forward_as_tuple(std::tie(SwizzleSrc, SwizzleIndices),
1388+
NumSwizzleLanes) = GetMostCommon(SwizzleCounts);
1389+
1390+
// Predicate returning true if the lane is properly initialized by the
1391+
// original instruction
1392+
std::function<bool(size_t, const SDValue &)> IsLaneConstructed;
1393+
SDValue Result;
13271394
if (Subtarget->hasUnimplementedSIMD128()) {
1328-
// {i32,i64,f32,f64}.const opcode, and value
1329-
const size_t ConstBytes = 1 + std::max(size_t(4), 16 / Lanes);
1330-
// SIMD prefix and opcode
1331-
const size_t SplatBytes = 2;
1332-
const size_t SplatConstBytes = SplatBytes + ConstBytes;
1333-
// SIMD prefix, opcode, and lane index
1334-
const size_t ReplaceBytes = 3;
1335-
const size_t ReplaceConstBytes = ReplaceBytes + ConstBytes;
1336-
// SIMD prefix, v128.const opcode, and 128-bit value
1337-
const size_t VecConstBytes = 18;
1338-
// Initial v128.const and a replace_lane for each non-const operand
1339-
const size_t ConstInitBytes = VecConstBytes + NumDynamic * ReplaceBytes;
1340-
// Initial splat and all necessary replace_lanes
1341-
const size_t SplatInitBytes =
1342-
IsConstant(SplatValue)
1343-
// Initial constant splat
1344-
? (SplatConstBytes +
1345-
// Constant replace_lanes
1346-
(NumConst - NumCommon) * ReplaceConstBytes +
1347-
// Dynamic replace_lanes
1348-
(NumDynamic * ReplaceBytes))
1349-
// Initial dynamic splat
1350-
: (SplatBytes +
1351-
// Constant replace_lanes
1352-
(NumConst * ReplaceConstBytes) +
1353-
// Dynamic replace_lanes
1354-
(NumDynamic - NumCommon) * ReplaceBytes);
1355-
if (ConstInitBytes < SplatInitBytes) {
1356-
// Create build_vector that will lower to initial v128.const
1395+
// Prefer swizzles over vector consts over splats
1396+
if (NumSwizzleLanes >= NumSplatLanes &&
1397+
NumSwizzleLanes >= NumConstantLanes) {
1398+
Result = DAG.getNode(WebAssemblyISD::SWIZZLE, DL, VecT, SwizzleSrc,
1399+
SwizzleIndices);
1400+
auto Swizzled = std::make_pair(SwizzleSrc, SwizzleIndices);
1401+
IsLaneConstructed = [&, Swizzled](size_t I, const SDValue &Lane) {
1402+
return Swizzled == GetSwizzleSrcs(I, Lane);
1403+
};
1404+
} else if (NumConstantLanes >= NumSplatLanes) {
13571405
SmallVector<SDValue, 16> ConstLanes;
13581406
for (const SDValue &Lane : Op->op_values()) {
13591407
if (IsConstant(Lane)) {
@@ -1364,35 +1412,35 @@ SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op,
13641412
ConstLanes.push_back(DAG.getConstant(0, DL, LaneT));
13651413
}
13661414
}
1367-
SDValue Result = DAG.getBuildVector(VecT, DL, ConstLanes);
1368-
// Add replace_lane instructions for non-const lanes
1369-
for (size_t I = 0; I < Lanes; ++I) {
1370-
const SDValue &Lane = Op->getOperand(I);
1371-
if (!Lane.isUndef() && !IsConstant(Lane))
1372-
Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecT, Result, Lane,
1373-
DAG.getConstant(I, DL, MVT::i32));
1374-
}
1375-
return Result;
1415+
Result = DAG.getBuildVector(VecT, DL, ConstLanes);
1416+
IsLaneConstructed = [&](size_t _, const SDValue &Lane) {
1417+
return IsConstant(Lane);
1418+
};
13761419
}
13771420
}
1378-
// Use a splat for the initial vector
1379-
SDValue Result;
1380-
// Possibly a load_splat
1381-
LoadSDNode *SplattedLoad;
1382-
if (Subtarget->hasUnimplementedSIMD128() &&
1383-
(SplattedLoad = dyn_cast<LoadSDNode>(SplatValue)) &&
1384-
SplattedLoad->getMemoryVT() == VecT.getVectorElementType()) {
1385-
Result = DAG.getNode(WebAssemblyISD::LOAD_SPLAT, DL, VecT, SplatValue);
1386-
} else {
1387-
Result = DAG.getSplatBuildVector(VecT, DL, SplatValue);
1421+
if (!Result) {
1422+
// Use a splat, but possibly a load_splat
1423+
LoadSDNode *SplattedLoad;
1424+
if (Subtarget->hasUnimplementedSIMD128() &&
1425+
(SplattedLoad = dyn_cast<LoadSDNode>(SplatValue)) &&
1426+
SplattedLoad->getMemoryVT() == VecT.getVectorElementType()) {
1427+
Result = DAG.getNode(WebAssemblyISD::LOAD_SPLAT, DL, VecT, SplatValue);
1428+
} else {
1429+
Result = DAG.getSplatBuildVector(VecT, DL, SplatValue);
1430+
}
1431+
IsLaneConstructed = [&](size_t _, const SDValue &Lane) {
1432+
return Lane == SplatValue;
1433+
};
13881434
}
1389-
// Add replace_lane instructions for other values
1435+
1436+
// Add replace_lane instructions for any unhandled values
13901437
for (size_t I = 0; I < Lanes; ++I) {
13911438
const SDValue &Lane = Op->getOperand(I);
1392-
if (Lane != SplatValue)
1439+
if (!Lane.isUndef() && !IsLaneConstructed(I, Lane))
13931440
Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecT, Result, Lane,
13941441
DAG.getConstant(I, DL, MVT::i32));
13951442
}
1443+
13961444
return Result;
13971445
}
13981446

‎llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td

+9
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,15 @@ def : Pat<(vec_t (wasm_shuffle (vec_t V128:$x), (vec_t V128:$y),
275275
(i32 LaneIdx32:$mE), (i32 LaneIdx32:$mF)))>;
276276
}
277277

278+
// Swizzle lanes: v8x16.swizzle
279+
def wasm_swizzle_t : SDTypeProfile<1, 2, []>;
280+
def wasm_swizzle : SDNode<"WebAssemblyISD::SWIZZLE", wasm_swizzle_t>;
281+
defm SWIZZLE :
282+
SIMD_I<(outs V128:$dst), (ins V128:$src, V128:$mask), (outs), (ins),
283+
[(set (v16i8 V128:$dst),
284+
(wasm_swizzle (v16i8 V128:$src), (v16i8 V128:$mask)))],
285+
"v8x16.swizzle\t$dst, $src, $mask", "v8x16.swizzle", 192>;
286+
278287
// Create vector with identical lanes: splat
279288
def splat2 : PatFrag<(ops node:$x), (build_vector node:$x, node:$x)>;
280289
def splat4 : PatFrag<(ops node:$x), (build_vector

‎llvm/test/CodeGen/WebAssembly/simd-build-vector.ll

+165-22
Original file line numberDiff line numberDiff line change
@@ -7,26 +7,25 @@
77
target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
88
target triple = "wasm32-unknown-unknown"
99

10-
; CHECK-LABEL: same_const_one_replaced_i8x16:
11-
; CHECK-NEXT: .functype same_const_one_replaced_i8x16 (i32) -> (v128)
12-
; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, 42
13-
; CHECK-NEXT: i16x8.splat $push[[L1:[0-9]+]]=, $pop[[L0]]
14-
; CHECK-NEXT: i16x8.replace_lane $push[[L2:[0-9]+]]=, $pop[[L1]], 5, $0
15-
; CHECK-NEXT: return $pop[[L2]]
16-
define <8 x i16> @same_const_one_replaced_i8x16(i16 %x) {
10+
; CHECK-LABEL: same_const_one_replaced_i16x8:
11+
; CHECK-NEXT: .functype same_const_one_replaced_i16x8 (i32) -> (v128)
12+
; CHECK-NEXT: v128.const $push[[L0:[0-9]+]]=, 42, 42, 42, 42, 42, 0, 42, 42
13+
; CHECK-NEXT: i16x8.replace_lane $push[[L1:[0-9]+]]=, $pop[[L0]], 5, $0
14+
; CHECK-NEXT: return $pop[[L1]]
15+
define <8 x i16> @same_const_one_replaced_i16x8(i16 %x) {
1716
%v = insertelement
1817
<8 x i16> <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42>,
1918
i16 %x,
2019
i32 5
2120
ret <8 x i16> %v
2221
}
2322

24-
; CHECK-LABEL: different_const_one_replaced_i8x16:
25-
; CHECK-NEXT: .functype different_const_one_replaced_i8x16 (i32) -> (v128)
23+
; CHECK-LABEL: different_const_one_replaced_i16x8:
24+
; CHECK-NEXT: .functype different_const_one_replaced_i16x8 (i32) -> (v128)
2625
; CHECK-NEXT: v128.const $push[[L0:[0-9]+]]=, 1, -2, 3, -4, 5, 0, 7, -8
2726
; CHECK-NEXT: i16x8.replace_lane $push[[L1:[0-9]+]]=, $pop[[L0]], 5, $0
2827
; CHECK-NEXT: return $pop[[L1]]
29-
define <8 x i16> @different_const_one_replaced_i8x16(i16 %x) {
28+
define <8 x i16> @different_const_one_replaced_i16x8(i16 %x) {
3029
%v = insertelement
3130
<8 x i16> <i16 1, i16 -2, i16 3, i16 -4, i16 5, i16 -6, i16 7, i16 -8>,
3231
i16 %x,
@@ -36,10 +35,9 @@ define <8 x i16> @different_const_one_replaced_i8x16(i16 %x) {
3635

3736
; CHECK-LABEL: same_const_one_replaced_f32x4:
3837
; CHECK-NEXT: .functype same_const_one_replaced_f32x4 (f32) -> (v128)
39-
; CHECK-NEXT: f32.const $push[[L0:[0-9]+]]=, 0x1.5p5
40-
; CHECK-NEXT: f32x4.splat $push[[L1:[0-9]+]]=, $pop[[L0]]
41-
; CHECK-NEXT: f32x4.replace_lane $push[[L2:[0-9]+]]=, $pop[[L1]], 2, $0
42-
; CHECK-NEXT: return $pop[[L2]]
38+
; CHECK-NEXT: v128.const $push[[L0:[0-9]+]]=, 0x1.5p5, 0x1.5p5, 0x0p0, 0x1.5p5
39+
; CHECK-NEXT: f32x4.replace_lane $push[[L1:[0-9]+]]=, $pop[[L0]], 2, $0
40+
; CHECK-NEXT: return $pop[[L1]]
4341
define <4 x float> @same_const_one_replaced_f32x4(float %x) {
4442
%v = insertelement
4543
<4 x float> <float 42., float 42., float 42., float 42.>,
@@ -63,11 +61,8 @@ define <4 x float> @different_const_one_replaced_f32x4(float %x) {
6361

6462
; CHECK-LABEL: splat_common_const_i32x4:
6563
; CHECK-NEXT: .functype splat_common_const_i32x4 () -> (v128)
66-
; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, 3
67-
; CHECK-NEXT: i32x4.splat $push[[L1:[0-9]+]]=, $pop[[L0]]
68-
; CHECK-NEXT: i32.const $push[[L2:[0-9]+]]=, 1
69-
; CHECK-NEXT: i32x4.replace_lane $push[[L3:[0-9]+]]=, $pop[[L1]], 3, $pop[[L2]]
70-
; CHECK-NEXT: return $pop[[L3]]
64+
; CHECK-NEXT: v128.const $push[[L0:[0-9]+]]=, 0, 3, 3, 1
65+
; CHECK-NEXT: return $pop[[L0]]
7166
define <4 x i32> @splat_common_const_i32x4() {
7267
ret <4 x i32> <i32 undef, i32 3, i32 3, i32 1>
7368
}
@@ -92,11 +87,159 @@ define <8 x i16> @splat_common_arg_i16x8(i16 %a, i16 %b, i16 %c) {
9287
ret <8 x i16> %v7
9388
}
9489

90+
; CHECK-LABEL: swizzle_one_i8x16:
91+
; CHECK-NEXT: .functype swizzle_one_i8x16 (v128, v128) -> (v128)
92+
; CHECK-NEXT: v8x16.swizzle $push[[L0:[0-9]+]]=, $0, $1
93+
; CHECK-NEXT: return $pop[[L0]]
94+
define <16 x i8> @swizzle_one_i8x16(<16 x i8> %src, <16 x i8> %mask) {
95+
%m0 = extractelement <16 x i8> %mask, i32 0
96+
%s0 = extractelement <16 x i8> %src, i8 %m0
97+
%v0 = insertelement <16 x i8> undef, i8 %s0, i32 0
98+
ret <16 x i8> %v0
99+
}
100+
101+
; CHECK-LABEL: swizzle_all_i8x16:
102+
; CHECK-NEXT: .functype swizzle_all_i8x16 (v128, v128) -> (v128)
103+
; CHECK-NEXT: v8x16.swizzle $push[[L0:[0-9]+]]=, $0, $1
104+
; CHECK-NEXT: return $pop[[L0]]
105+
define <16 x i8> @swizzle_all_i8x16(<16 x i8> %src, <16 x i8> %mask) {
106+
%m0 = extractelement <16 x i8> %mask, i32 0
107+
%s0 = extractelement <16 x i8> %src, i8 %m0
108+
%v0 = insertelement <16 x i8> undef, i8 %s0, i32 0
109+
%m1 = extractelement <16 x i8> %mask, i32 1
110+
%s1 = extractelement <16 x i8> %src, i8 %m1
111+
%v1 = insertelement <16 x i8> %v0, i8 %s1, i32 1
112+
%m2 = extractelement <16 x i8> %mask, i32 2
113+
%s2 = extractelement <16 x i8> %src, i8 %m2
114+
%v2 = insertelement <16 x i8> %v1, i8 %s2, i32 2
115+
%m3 = extractelement <16 x i8> %mask, i32 3
116+
%s3 = extractelement <16 x i8> %src, i8 %m3
117+
%v3 = insertelement <16 x i8> %v2, i8 %s3, i32 3
118+
%m4 = extractelement <16 x i8> %mask, i32 4
119+
%s4 = extractelement <16 x i8> %src, i8 %m4
120+
%v4 = insertelement <16 x i8> %v3, i8 %s4, i32 4
121+
%m5 = extractelement <16 x i8> %mask, i32 5
122+
%s5 = extractelement <16 x i8> %src, i8 %m5
123+
%v5 = insertelement <16 x i8> %v4, i8 %s5, i32 5
124+
%m6 = extractelement <16 x i8> %mask, i32 6
125+
%s6 = extractelement <16 x i8> %src, i8 %m6
126+
%v6 = insertelement <16 x i8> %v5, i8 %s6, i32 6
127+
%m7 = extractelement <16 x i8> %mask, i32 7
128+
%s7 = extractelement <16 x i8> %src, i8 %m7
129+
%v7 = insertelement <16 x i8> %v6, i8 %s7, i32 7
130+
%m8 = extractelement <16 x i8> %mask, i32 8
131+
%s8 = extractelement <16 x i8> %src, i8 %m8
132+
%v8 = insertelement <16 x i8> %v7, i8 %s8, i32 8
133+
%m9 = extractelement <16 x i8> %mask, i32 9
134+
%s9 = extractelement <16 x i8> %src, i8 %m9
135+
%v9 = insertelement <16 x i8> %v8, i8 %s9, i32 9
136+
%m10 = extractelement <16 x i8> %mask, i32 10
137+
%s10 = extractelement <16 x i8> %src, i8 %m10
138+
%v10 = insertelement <16 x i8> %v9, i8 %s10, i32 10
139+
%m11 = extractelement <16 x i8> %mask, i32 11
140+
%s11 = extractelement <16 x i8> %src, i8 %m11
141+
%v11 = insertelement <16 x i8> %v10, i8 %s11, i32 11
142+
%m12 = extractelement <16 x i8> %mask, i32 12
143+
%s12 = extractelement <16 x i8> %src, i8 %m12
144+
%v12 = insertelement <16 x i8> %v11, i8 %s12, i32 12
145+
%m13 = extractelement <16 x i8> %mask, i32 13
146+
%s13 = extractelement <16 x i8> %src, i8 %m13
147+
%v13 = insertelement <16 x i8> %v12, i8 %s13, i32 13
148+
%m14 = extractelement <16 x i8> %mask, i32 14
149+
%s14 = extractelement <16 x i8> %src, i8 %m14
150+
%v14 = insertelement <16 x i8> %v13, i8 %s14, i32 14
151+
%m15 = extractelement <16 x i8> %mask, i32 15
152+
%s15 = extractelement <16 x i8> %src, i8 %m15
153+
%v15 = insertelement <16 x i8> %v14, i8 %s15, i32 15
154+
ret <16 x i8> %v15
155+
}
156+
157+
; CHECK-LABEL: swizzle_one_i16x8:
158+
; CHECK-NEXT: .functype swizzle_one_i16x8 (v128, v128) -> (v128)
159+
; CHECK-NOT: swizzle
160+
; CHECK: return
161+
define <8 x i16> @swizzle_one_i16x8(<8 x i16> %src, <8 x i16> %mask) {
162+
%m0 = extractelement <8 x i16> %mask, i32 0
163+
%s0 = extractelement <8 x i16> %src, i16 %m0
164+
%v0 = insertelement <8 x i16> undef, i16 %s0, i32 0
165+
ret <8 x i16> %v0
166+
}
167+
168+
; CHECK-LABEL: mashup_swizzle_i8x16:
169+
; CHECK-NEXT: .functype mashup_swizzle_i8x16 (v128, v128, i32) -> (v128)
170+
; CHECK-NEXT: v8x16.swizzle $push[[L0:[0-9]+]]=, $0, $1
171+
; CHECK: i8x16.replace_lane
172+
; CHECK: i8x16.replace_lane
173+
; CHECK: i8x16.replace_lane
174+
; CHECK: i8x16.replace_lane
175+
; CHECK: return
176+
define <16 x i8> @mashup_swizzle_i8x16(<16 x i8> %src, <16 x i8> %mask, i8 %splatted) {
177+
; swizzle 0
178+
%m0 = extractelement <16 x i8> %mask, i32 0
179+
%s0 = extractelement <16 x i8> %src, i8 %m0
180+
%v0 = insertelement <16 x i8> undef, i8 %s0, i32 0
181+
; swizzle 7
182+
%m1 = extractelement <16 x i8> %mask, i32 7
183+
%s1 = extractelement <16 x i8> %src, i8 %m1
184+
%v1 = insertelement <16 x i8> %v0, i8 %s1, i32 7
185+
; splat 3
186+
%v2 = insertelement <16 x i8> %v1, i8 %splatted, i32 3
187+
; splat 12
188+
%v3 = insertelement <16 x i8> %v2, i8 %splatted, i32 12
189+
; const 4
190+
%v4 = insertelement <16 x i8> %v3, i8 42, i32 4
191+
; const 14
192+
%v5 = insertelement <16 x i8> %v4, i8 42, i32 14
193+
ret <16 x i8> %v5
194+
}
195+
196+
; CHECK-LABEL: mashup_const_i8x16:
197+
; CHECK-NEXT: .functype mashup_const_i8x16 (v128, v128, i32) -> (v128)
198+
; CHECK: v128.const $push[[L0:[0-9]+]]=, 0, 0, 0, 0, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 0
199+
; CHECK: i8x16.replace_lane
200+
; CHECK: i8x16.replace_lane
201+
; CHECK: i8x16.replace_lane
202+
; CHECK: return
203+
define <16 x i8> @mashup_const_i8x16(<16 x i8> %src, <16 x i8> %mask, i8 %splatted) {
204+
; swizzle 0
205+
%m0 = extractelement <16 x i8> %mask, i32 0
206+
%s0 = extractelement <16 x i8> %src, i8 %m0
207+
%v0 = insertelement <16 x i8> undef, i8 %s0, i32 0
208+
; splat 3
209+
%v1 = insertelement <16 x i8> %v0, i8 %splatted, i32 3
210+
; splat 12
211+
%v2 = insertelement <16 x i8> %v1, i8 %splatted, i32 12
212+
; const 4
213+
%v3 = insertelement <16 x i8> %v2, i8 42, i32 4
214+
; const 14
215+
%v4 = insertelement <16 x i8> %v3, i8 42, i32 14
216+
ret <16 x i8> %v4
217+
}
218+
219+
; CHECK-LABEL: mashup_splat_i8x16:
220+
; CHECK-NEXT: .functype mashup_splat_i8x16 (v128, v128, i32) -> (v128)
221+
; CHECK: i8x16.splat $push[[L0:[0-9]+]]=, $2
222+
; CHECK: i8x16.replace_lane
223+
; CHECK: i8x16.replace_lane
224+
; CHECK: return
225+
define <16 x i8> @mashup_splat_i8x16(<16 x i8> %src, <16 x i8> %mask, i8 %splatted) {
226+
; swizzle 0
227+
%m0 = extractelement <16 x i8> %mask, i32 0
228+
%s0 = extractelement <16 x i8> %src, i8 %m0
229+
%v0 = insertelement <16 x i8> undef, i8 %s0, i32 0
230+
; splat 3
231+
%v1 = insertelement <16 x i8> %v0, i8 %splatted, i32 3
232+
; splat 12
233+
%v2 = insertelement <16 x i8> %v1, i8 %splatted, i32 12
234+
; const 4
235+
%v3 = insertelement <16 x i8> %v2, i8 42, i32 4
236+
ret <16 x i8> %v3
237+
}
238+
95239
; CHECK-LABEL: undef_const_insert_f32x4:
96240
; CHECK-NEXT: .functype undef_const_insert_f32x4 () -> (v128)
97-
; CHECK-NEXT: f32.const $push[[L0:[0-9]+]]=, 0x1.5p5
98-
; CHECK-NEXT: f32x4.splat $push[[L1:[0-9]+]]=, $pop[[L0]]
99-
; CHECK-NEXT: return $pop[[L1]]
241+
; CHECK-NEXT: v128.const $push[[L0:[0-9]+]]=, 0x0p0, 0x1.5p5, 0x0p0, 0x0p0
242+
; CHECK-NEXT: return $pop[[L0]]
100243
define <4 x float> @undef_const_insert_f32x4() {
101244
%v = insertelement <4 x float> undef, float 42., i32 1
102245
ret <4 x float> %v

‎llvm/test/MC/WebAssembly/simd-encodings.s

+3
Original file line numberDiff line numberDiff line change
@@ -463,6 +463,9 @@ main:
463463
# CHECK: f64x2.convert_i64x2_u # encoding: [0xfd,0xb2,0x01]
464464
f64x2.convert_i64x2_u
465465

466+
# CHECK: v8x16.swizzle # encoding: [0xfd,0xc0,0x01]
467+
v8x16.swizzle
468+
466469
# CHECK: v8x16.load_splat 48 # encoding: [0xfd,0xc2,0x01,0x00,0x30]
467470
v8x16.load_splat 48
468471

0 commit comments

Comments
 (0)
Please sign in to comment.