Skip to content

Commit 5d403f6

Browse files
committedDec 21, 2018
[X86][SSE] Auto upgrade PADDS/PSUBS intrinsics to SADD_SAT/SSUB_SAT generic intrinsics (llvm)
This auto upgrades the signed SSE saturated math intrinsics to SADD_SAT/SSUB_SAT generic intrinsics. Clang counterpart: https://reviews.llvm.org/D55890 Differential Revision: https://reviews.llvm.org/D55894 llvm-svn: 349892
1 parent f069f1c commit 5d403f6

12 files changed

+1072
-1231
lines changed
 

‎llvm/include/llvm/IR/IntrinsicsX86.td

Lines changed: 0 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -364,18 +364,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
364364

365365
// Integer arithmetic ops.
366366
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
367-
def int_x86_sse2_padds_b :
368-
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
369-
llvm_v16i8_ty], [IntrNoMem, Commutative]>;
370-
def int_x86_sse2_padds_w :
371-
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
372-
llvm_v8i16_ty], [IntrNoMem, Commutative]>;
373-
def int_x86_sse2_psubs_b :
374-
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty,
375-
llvm_v16i8_ty], [IntrNoMem]>;
376-
def int_x86_sse2_psubs_w :
377-
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
378-
llvm_v8i16_ty], [IntrNoMem]>;
379367
def int_x86_sse2_pmulhu_w : GCCBuiltin<"__builtin_ia32_pmulhuw128">,
380368
Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
381369
llvm_v8i16_ty], [IntrNoMem, Commutative]>;
@@ -1346,18 +1334,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
13461334

13471335
// Integer arithmetic ops.
13481336
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
1349-
def int_x86_avx2_padds_b :
1350-
Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
1351-
llvm_v32i8_ty], [IntrNoMem, Commutative]>;
1352-
def int_x86_avx2_padds_w :
1353-
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
1354-
llvm_v16i16_ty], [IntrNoMem, Commutative]>;
1355-
def int_x86_avx2_psubs_b :
1356-
Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty,
1357-
llvm_v32i8_ty], [IntrNoMem]>;
1358-
def int_x86_avx2_psubs_w :
1359-
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
1360-
llvm_v16i16_ty], [IntrNoMem]>;
13611337
def int_x86_avx2_pmulhu_w : GCCBuiltin<"__builtin_ia32_pmulhuw256">,
13621338
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
13631339
llvm_v16i16_ty], [IntrNoMem, Commutative]>;
@@ -3574,18 +3550,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
35743550
}
35753551
// Integer arithmetic ops
35763552
let TargetPrefix = "x86" in {
3577-
def int_x86_avx512_padds_b_512 :
3578-
Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty],
3579-
[IntrNoMem]>;
3580-
def int_x86_avx512_padds_w_512 :
3581-
Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty],
3582-
[IntrNoMem]>;
3583-
def int_x86_avx512_psubs_b_512 :
3584-
Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, llvm_v64i8_ty],
3585-
[IntrNoMem]>;
3586-
def int_x86_avx512_psubs_w_512 :
3587-
Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty, llvm_v32i16_ty],
3588-
[IntrNoMem]>;
35893553
def int_x86_avx512_pmulhu_w_512 : GCCBuiltin<"__builtin_ia32_pmulhuw512">,
35903554
Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty,
35913555
llvm_v32i16_ty], [IntrNoMem, Commutative]>;

‎llvm/lib/IR/AutoUpgrade.cpp

Lines changed: 23 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -77,10 +77,18 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
7777
Name == "addcarry.u64" || // Added in 8.0
7878
Name == "subborrow.u32" || // Added in 8.0
7979
Name == "subborrow.u64" || // Added in 8.0
80+
Name.startswith("sse2.padds.") || // Added in 8.0
81+
Name.startswith("sse2.psubs.") || // Added in 8.0
8082
Name.startswith("sse2.paddus.") || // Added in 8.0
8183
Name.startswith("sse2.psubus.") || // Added in 8.0
84+
Name.startswith("avx2.padds.") || // Added in 8.0
85+
Name.startswith("avx2.psubs.") || // Added in 8.0
8286
Name.startswith("avx2.paddus.") || // Added in 8.0
8387
Name.startswith("avx2.psubus.") || // Added in 8.0
88+
Name.startswith("avx512.padds.") || // Added in 8.0
89+
Name.startswith("avx512.psubs.") || // Added in 8.0
90+
Name.startswith("avx512.mask.padds.") || // Added in 8.0
91+
Name.startswith("avx512.mask.psubs.") || // Added in 8.0
8492
Name.startswith("avx512.mask.paddus.") || // Added in 8.0
8593
Name.startswith("avx512.mask.psubus.") || // Added in 8.0
8694
Name=="ssse3.pabs.b.128" || // Added in 6.0
@@ -284,8 +292,6 @@ static bool ShouldUpgradeX86Intrinsic(Function *F, StringRef Name) {
284292
Name.startswith("avx512.mask.max.p") || // Added in 7.0. 128/256 in 5.0
285293
Name.startswith("avx512.mask.min.p") || // Added in 7.0. 128/256 in 5.0
286294
Name.startswith("avx512.mask.fpclass.p") || // Added in 7.0
287-
Name.startswith("avx512.mask.padds.") || // Added in 8.0
288-
Name.startswith("avx512.mask.psubs.") || // Added in 8.0
289295
Name == "sse.cvtsi2ss" || // Added in 7.0
290296
Name == "sse.cvtsi642ss" || // Added in 7.0
291297
Name == "sse2.cvtsi2sd" || // Added in 7.0
@@ -928,12 +934,14 @@ static Value *UpgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0,
928934
}
929935

930936
static Value *UpgradeX86AddSubSatIntrinsics(IRBuilder<> &Builder, CallInst &CI,
931-
bool IsAddition) {
937+
bool IsSigned, bool IsAddition) {
932938
Type *Ty = CI.getType();
933939
Value *Op0 = CI.getOperand(0);
934940
Value *Op1 = CI.getOperand(1);
935941

936-
Intrinsic::ID IID = IsAddition ? Intrinsic::uadd_sat : Intrinsic::usub_sat;
942+
Intrinsic::ID IID =
943+
IsSigned ? (IsAddition ? Intrinsic::sadd_sat : Intrinsic::ssub_sat)
944+
: (IsAddition ? Intrinsic::uadd_sat : Intrinsic::usub_sat);
937945
Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty);
938946
Value *Res = Builder.CreateCall(Intrin, {Op0, Op1});
939947

@@ -1380,36 +1388,6 @@ static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder,
13801388
IID = Intrinsic::x86_avx512_vpshrd_w_512;
13811389
else
13821390
llvm_unreachable("Unexpected intrinsic");
1383-
} else if (Name.startswith("padds.")) {
1384-
if (VecWidth == 128 && EltWidth == 8)
1385-
IID = Intrinsic::x86_sse2_padds_b;
1386-
else if (VecWidth == 256 && EltWidth == 8)
1387-
IID = Intrinsic::x86_avx2_padds_b;
1388-
else if (VecWidth == 512 && EltWidth == 8)
1389-
IID = Intrinsic::x86_avx512_padds_b_512;
1390-
else if (VecWidth == 128 && EltWidth == 16)
1391-
IID = Intrinsic::x86_sse2_padds_w;
1392-
else if (VecWidth == 256 && EltWidth == 16)
1393-
IID = Intrinsic::x86_avx2_padds_w;
1394-
else if (VecWidth == 512 && EltWidth == 16)
1395-
IID = Intrinsic::x86_avx512_padds_w_512;
1396-
else
1397-
llvm_unreachable("Unexpected intrinsic");
1398-
} else if (Name.startswith("psubs.")) {
1399-
if (VecWidth == 128 && EltWidth == 8)
1400-
IID = Intrinsic::x86_sse2_psubs_b;
1401-
else if (VecWidth == 256 && EltWidth == 8)
1402-
IID = Intrinsic::x86_avx2_psubs_b;
1403-
else if (VecWidth == 512 && EltWidth == 8)
1404-
IID = Intrinsic::x86_avx512_psubs_b_512;
1405-
else if (VecWidth == 128 && EltWidth == 16)
1406-
IID = Intrinsic::x86_sse2_psubs_w;
1407-
else if (VecWidth == 256 && EltWidth == 16)
1408-
IID = Intrinsic::x86_avx2_psubs_w;
1409-
else if (VecWidth == 512 && EltWidth == 16)
1410-
IID = Intrinsic::x86_avx512_psubs_w_512;
1411-
else
1412-
llvm_unreachable("Unexpected intrinsic");
14131391
} else
14141392
return false;
14151393

@@ -2093,14 +2071,24 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
20932071
if (CI->getNumArgOperands() == 3)
20942072
Rep = EmitX86Select(Builder, CI->getArgOperand(2), Rep,
20952073
CI->getArgOperand(1));
2074+
} else if (IsX86 && (Name.startswith("sse2.padds.") ||
2075+
Name.startswith("sse2.psubs.") ||
2076+
Name.startswith("avx2.padds.") ||
2077+
Name.startswith("avx2.psubs.") ||
2078+
Name.startswith("avx512.padds.") ||
2079+
Name.startswith("avx512.psubs.") ||
2080+
Name.startswith("avx512.mask.padds.") ||
2081+
Name.startswith("avx512.mask.psubs."))) {
2082+
bool IsAdd = Name.contains(".padds");
2083+
Rep = UpgradeX86AddSubSatIntrinsics(Builder, *CI, true, IsAdd);
20962084
} else if (IsX86 && (Name.startswith("sse2.paddus.") ||
20972085
Name.startswith("sse2.psubus.") ||
20982086
Name.startswith("avx2.paddus.") ||
20992087
Name.startswith("avx2.psubus.") ||
21002088
Name.startswith("avx512.mask.paddus.") ||
21012089
Name.startswith("avx512.mask.psubus."))) {
21022090
bool IsAdd = Name.contains(".paddus");
2103-
Rep = UpgradeX86AddSubSatIntrinsics(Builder, *CI, IsAdd);
2091+
Rep = UpgradeX86AddSubSatIntrinsics(Builder, *CI, false, IsAdd);
21042092
} else if (IsX86 && Name.startswith("avx512.mask.palignr.")) {
21052093
Rep = UpgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
21062094
CI->getArgOperand(1),

‎llvm/lib/Target/X86/X86IntrinsicsInfo.h

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -319,8 +319,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
319319
X86_INTRINSIC_DATA(avx2_packsswb, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
320320
X86_INTRINSIC_DATA(avx2_packusdw, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
321321
X86_INTRINSIC_DATA(avx2_packuswb, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
322-
X86_INTRINSIC_DATA(avx2_padds_b, INTR_TYPE_2OP, ISD::SADDSAT, 0),
323-
X86_INTRINSIC_DATA(avx2_padds_w, INTR_TYPE_2OP, ISD::SADDSAT, 0),
324322
X86_INTRINSIC_DATA(avx2_permd, VPERM_2OP, X86ISD::VPERMV, 0),
325323
X86_INTRINSIC_DATA(avx2_permps, VPERM_2OP, X86ISD::VPERMV, 0),
326324
X86_INTRINSIC_DATA(avx2_phadd_d, INTR_TYPE_2OP, X86ISD::HADD, 0),
@@ -361,8 +359,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
361359
X86_INTRINSIC_DATA(avx2_psrlv_d_256, INTR_TYPE_2OP, ISD::SRL, 0),
362360
X86_INTRINSIC_DATA(avx2_psrlv_q, INTR_TYPE_2OP, ISD::SRL, 0),
363361
X86_INTRINSIC_DATA(avx2_psrlv_q_256, INTR_TYPE_2OP, ISD::SRL, 0),
364-
X86_INTRINSIC_DATA(avx2_psubs_b, INTR_TYPE_2OP, ISD::SSUBSAT, 0),
365-
X86_INTRINSIC_DATA(avx2_psubs_w, INTR_TYPE_2OP, ISD::SSUBSAT, 0),
366362
X86_INTRINSIC_DATA(avx512_add_pd_512, INTR_TYPE_2OP, ISD::FADD, X86ISD::FADD_RND),
367363
X86_INTRINSIC_DATA(avx512_add_ps_512, INTR_TYPE_2OP, ISD::FADD, X86ISD::FADD_RND),
368364
X86_INTRINSIC_DATA(avx512_cmp_pd_128, CMP_MASK_CC, X86ISD::CMPM, 0),
@@ -920,8 +916,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
920916
X86_INTRINSIC_DATA(avx512_packsswb_512, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
921917
X86_INTRINSIC_DATA(avx512_packusdw_512, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
922918
X86_INTRINSIC_DATA(avx512_packuswb_512, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
923-
X86_INTRINSIC_DATA(avx512_padds_b_512, INTR_TYPE_2OP, ISD::SADDSAT, 0),
924-
X86_INTRINSIC_DATA(avx512_padds_w_512, INTR_TYPE_2OP, ISD::SADDSAT, 0),
925919
X86_INTRINSIC_DATA(avx512_permvar_df_256, VPERM_2OP, X86ISD::VPERMV, 0),
926920
X86_INTRINSIC_DATA(avx512_permvar_df_512, VPERM_2OP, X86ISD::VPERMV, 0),
927921
X86_INTRINSIC_DATA(avx512_permvar_di_256, VPERM_2OP, X86ISD::VPERMV, 0),
@@ -980,8 +974,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
980974
X86_INTRINSIC_DATA(avx512_psrlv_w_128, INTR_TYPE_2OP, ISD::SRL, 0),
981975
X86_INTRINSIC_DATA(avx512_psrlv_w_256, INTR_TYPE_2OP, ISD::SRL, 0),
982976
X86_INTRINSIC_DATA(avx512_psrlv_w_512, INTR_TYPE_2OP, ISD::SRL, 0),
983-
X86_INTRINSIC_DATA(avx512_psubs_b_512, INTR_TYPE_2OP, ISD::SSUBSAT, 0),
984-
X86_INTRINSIC_DATA(avx512_psubs_w_512, INTR_TYPE_2OP, ISD::SSUBSAT, 0),
985977
X86_INTRINSIC_DATA(avx512_pternlog_d_128, INTR_TYPE_4OP, X86ISD::VPTERNLOG, 0),
986978
X86_INTRINSIC_DATA(avx512_pternlog_d_256, INTR_TYPE_4OP, X86ISD::VPTERNLOG, 0),
987979
X86_INTRINSIC_DATA(avx512_pternlog_d_512, INTR_TYPE_4OP, X86ISD::VPTERNLOG, 0),
@@ -1144,8 +1136,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
11441136
X86_INTRINSIC_DATA(sse2_packssdw_128, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
11451137
X86_INTRINSIC_DATA(sse2_packsswb_128, INTR_TYPE_2OP, X86ISD::PACKSS, 0),
11461138
X86_INTRINSIC_DATA(sse2_packuswb_128, INTR_TYPE_2OP, X86ISD::PACKUS, 0),
1147-
X86_INTRINSIC_DATA(sse2_padds_b, INTR_TYPE_2OP, ISD::SADDSAT, 0),
1148-
X86_INTRINSIC_DATA(sse2_padds_w, INTR_TYPE_2OP, ISD::SADDSAT, 0),
11491139
X86_INTRINSIC_DATA(sse2_pmadd_wd, INTR_TYPE_2OP, X86ISD::VPMADDWD, 0),
11501140
X86_INTRINSIC_DATA(sse2_pmovmskb_128, INTR_TYPE_1OP, X86ISD::MOVMSK, 0),
11511141
X86_INTRINSIC_DATA(sse2_pmulh_w, INTR_TYPE_2OP, ISD::MULHS, 0),
@@ -1167,8 +1157,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
11671157
X86_INTRINSIC_DATA(sse2_psrli_d, VSHIFT, X86ISD::VSRLI, 0),
11681158
X86_INTRINSIC_DATA(sse2_psrli_q, VSHIFT, X86ISD::VSRLI, 0),
11691159
X86_INTRINSIC_DATA(sse2_psrli_w, VSHIFT, X86ISD::VSRLI, 0),
1170-
X86_INTRINSIC_DATA(sse2_psubs_b, INTR_TYPE_2OP, ISD::SSUBSAT, 0),
1171-
X86_INTRINSIC_DATA(sse2_psubs_w, INTR_TYPE_2OP, ISD::SSUBSAT, 0),
11721160
X86_INTRINSIC_DATA(sse2_ucomieq_sd, COMI, X86ISD::UCOMI, ISD::SETEQ),
11731161
X86_INTRINSIC_DATA(sse2_ucomige_sd, COMI, X86ISD::UCOMI, ISD::SETGE),
11741162
X86_INTRINSIC_DATA(sse2_ucomigt_sd, COMI, X86ISD::UCOMI, ISD::SETGT),

‎llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp

Lines changed: 0 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -248,67 +248,6 @@ Instruction *InstCombiner::SimplifyAnyMemSet(AnyMemSetInst *MI) {
248248
return nullptr;
249249
}
250250

251-
static Value *simplifyX86AddsSubs(const IntrinsicInst &II,
252-
InstCombiner::BuilderTy &Builder) {
253-
bool IsAddition;
254-
255-
switch (II.getIntrinsicID()) {
256-
default: llvm_unreachable("Unexpected intrinsic!");
257-
case Intrinsic::x86_sse2_padds_b:
258-
case Intrinsic::x86_sse2_padds_w:
259-
case Intrinsic::x86_avx2_padds_b:
260-
case Intrinsic::x86_avx2_padds_w:
261-
case Intrinsic::x86_avx512_padds_b_512:
262-
case Intrinsic::x86_avx512_padds_w_512:
263-
IsAddition = true;
264-
break;
265-
case Intrinsic::x86_sse2_psubs_b:
266-
case Intrinsic::x86_sse2_psubs_w:
267-
case Intrinsic::x86_avx2_psubs_b:
268-
case Intrinsic::x86_avx2_psubs_w:
269-
case Intrinsic::x86_avx512_psubs_b_512:
270-
case Intrinsic::x86_avx512_psubs_w_512:
271-
IsAddition = false;
272-
break;
273-
}
274-
275-
auto *Arg0 = dyn_cast<Constant>(II.getOperand(0));
276-
auto *Arg1 = dyn_cast<Constant>(II.getOperand(1));
277-
auto VT = cast<VectorType>(II.getType());
278-
auto SVT = VT->getElementType();
279-
unsigned NumElems = VT->getNumElements();
280-
281-
if (!Arg0 || !Arg1)
282-
return nullptr;
283-
284-
SmallVector<Constant *, 64> Result;
285-
286-
APInt MaxValue = APInt::getSignedMaxValue(SVT->getIntegerBitWidth());
287-
APInt MinValue = APInt::getSignedMinValue(SVT->getIntegerBitWidth());
288-
for (unsigned i = 0; i < NumElems; ++i) {
289-
auto *Elt0 = Arg0->getAggregateElement(i);
290-
auto *Elt1 = Arg1->getAggregateElement(i);
291-
if (isa<UndefValue>(Elt0) || isa<UndefValue>(Elt1)) {
292-
Result.push_back(UndefValue::get(SVT));
293-
continue;
294-
}
295-
296-
if (!isa<ConstantInt>(Elt0) || !isa<ConstantInt>(Elt1))
297-
return nullptr;
298-
299-
const APInt &Val0 = cast<ConstantInt>(Elt0)->getValue();
300-
const APInt &Val1 = cast<ConstantInt>(Elt1)->getValue();
301-
bool Overflow = false;
302-
APInt ResultElem = IsAddition ? Val0.sadd_ov(Val1, Overflow)
303-
: Val0.ssub_ov(Val1, Overflow);
304-
if (Overflow)
305-
ResultElem = Val0.isNegative() ? MinValue : MaxValue;
306-
Result.push_back(Constant::getIntegerValue(SVT, ResultElem));
307-
}
308-
309-
return ConstantVector::get(Result);
310-
}
311-
312251
static Value *simplifyX86immShift(const IntrinsicInst &II,
313252
InstCombiner::BuilderTy &Builder) {
314253
bool LogicalShift = false;
@@ -2789,23 +2728,6 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
27892728
break;
27902729
}
27912730

2792-
// Constant fold add/sub with saturation intrinsics.
2793-
case Intrinsic::x86_sse2_padds_b:
2794-
case Intrinsic::x86_sse2_padds_w:
2795-
case Intrinsic::x86_sse2_psubs_b:
2796-
case Intrinsic::x86_sse2_psubs_w:
2797-
case Intrinsic::x86_avx2_padds_b:
2798-
case Intrinsic::x86_avx2_padds_w:
2799-
case Intrinsic::x86_avx2_psubs_b:
2800-
case Intrinsic::x86_avx2_psubs_w:
2801-
case Intrinsic::x86_avx512_padds_b_512:
2802-
case Intrinsic::x86_avx512_padds_w_512:
2803-
case Intrinsic::x86_avx512_psubs_b_512:
2804-
case Intrinsic::x86_avx512_psubs_w_512:
2805-
if (Value *V = simplifyX86AddsSubs(*II, Builder))
2806-
return replaceInstUsesWith(*II, V);
2807-
break;
2808-
28092731
// Constant fold ashr( <A x Bi>, Ci ).
28102732
// Constant fold lshr( <A x Bi>, Ci ).
28112733
// Constant fold shl( <A x Bi>, Ci ).

‎llvm/test/CodeGen/X86/avx2-intrinsics-x86-upgrade.ll

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -614,6 +614,28 @@ define <4 x i64> @test_x86_avx2_pmul_dq(<8 x i32> %a0, <8 x i32> %a1) {
614614
declare <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32>, <8 x i32>) nounwind readnone
615615

616616

617+
define <32 x i8> @test_x86_avx2_padds_b(<32 x i8> %a0, <32 x i8> %a1) {
618+
; CHECK-LABEL: test_x86_avx2_padds_b:
619+
; CHECK: ## %bb.0:
620+
; CHECK-NEXT: vpaddsb %ymm1, %ymm0, %ymm0
621+
; CHECK-NEXT: ret{{[l|q]}}
622+
%res = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]
623+
ret <32 x i8> %res
624+
}
625+
declare <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8>, <32 x i8>) nounwind readnone
626+
627+
628+
define <16 x i16> @test_x86_avx2_padds_w(<16 x i16> %a0, <16 x i16> %a1) {
629+
; CHECK-LABEL: test_x86_avx2_padds_w:
630+
; CHECK: ## %bb.0:
631+
; CHECK-NEXT: vpaddsw %ymm1, %ymm0, %ymm0
632+
; CHECK-NEXT: ret{{[l|q]}}
633+
%res = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
634+
ret <16 x i16> %res
635+
}
636+
declare <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16>, <16 x i16>) nounwind readnone
637+
638+
617639
define <32 x i8> @test_x86_avx2_paddus_b(<32 x i8> %a0, <32 x i8> %a1) {
618640
; CHECK-LABEL: test_x86_avx2_paddus_b:
619641
; CHECK: ## %bb.0:
@@ -636,6 +658,28 @@ define <16 x i16> @test_x86_avx2_paddus_w(<16 x i16> %a0, <16 x i16> %a1) {
636658
declare <16 x i16> @llvm.x86.avx2.paddus.w(<16 x i16>, <16 x i16>) nounwind readnone
637659

638660

661+
define <32 x i8> @test_x86_avx2_psubs_b(<32 x i8> %a0, <32 x i8> %a1) {
662+
; CHECK-LABEL: test_x86_avx2_psubs_b:
663+
; CHECK: ## %bb.0:
664+
; CHECK-NEXT: vpsubsb %ymm1, %ymm0, %ymm0
665+
; CHECK-NEXT: ret{{[l|q]}}
666+
%res = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]
667+
ret <32 x i8> %res
668+
}
669+
declare <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8>, <32 x i8>) nounwind readnone
670+
671+
672+
define <16 x i16> @test_x86_avx2_psubs_w(<16 x i16> %a0, <16 x i16> %a1) {
673+
; CHECK-LABEL: test_x86_avx2_psubs_w:
674+
; CHECK: ## %bb.0:
675+
; CHECK-NEXT: vpsubsw %ymm1, %ymm0, %ymm0
676+
; CHECK-NEXT: ret{{[l|q]}}
677+
%res = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
678+
ret <16 x i16> %res
679+
}
680+
declare <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16>, <16 x i16>) nounwind readnone
681+
682+
639683
define <32 x i8> @test_x86_avx2_psubus_b(<32 x i8> %a0, <32 x i8> %a1) {
640684
; CHECK-LABEL: test_x86_avx2_psubus_b:
641685
; CHECK: ## %bb.0:

0 commit comments

Comments
 (0)