Skip to content

Commit 1ada137

Browse files
committedAug 20, 2019
[X86] Add back the -x86-experimental-vector-widening-legalization comand line flag and all associated code, but leave it enabled by default
Google is reporting performance issues with the new default behavior and have asked for a way to switch back to the old behavior while we investigate and make fixes. I've restored all of the code that had since been removed and added additional checks of the command flag onto code paths that are not otherwise guarded by a check of getTypeAction. I've also modified the cost model tables to hopefully get us back to the previous costs. Hopefully we won't need to support this for very long since we have no test coverage of the old behavior so we can very easily break it. llvm-svn: 369332
1 parent 12cbbab commit 1ada137

File tree

2 files changed

+1227
-143
lines changed

2 files changed

+1227
-143
lines changed
 

‎llvm/lib/Target/X86/X86ISelLowering.cpp

+1,175-130
Large diffs are not rendered by default.

‎llvm/lib/Target/X86/X86TargetTransformInfo.cpp

+52-13
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,8 @@ using namespace llvm;
5050

5151
#define DEBUG_TYPE "x86tti"
5252

53+
extern cl::opt<bool> ExperimentalVectorWideningLegalization;
54+
5355
//===----------------------------------------------------------------------===//
5456
//
5557
// X86 cost model.
@@ -918,7 +920,8 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
918920
// FIXME: We can use permq for 64-bit or larger extracts from 256-bit
919921
// vectors.
920922
int OrigSubElts = SubTp->getVectorNumElements();
921-
if (NumSubElts > OrigSubElts &&
923+
if (ExperimentalVectorWideningLegalization &&
924+
NumSubElts > OrigSubElts &&
922925
(Index % OrigSubElts) == 0 && (NumSubElts % OrigSubElts) == 0 &&
923926
LT.second.getVectorElementType() ==
924927
SubLT.second.getVectorElementType() &&
@@ -1330,6 +1333,12 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
13301333
// TODO: For AVX512DQ + AVX512VL, we also have cheap casts for 128-bit and
13311334
// 256-bit wide vectors.
13321335

1336+
// Used with widening legalization
1337+
static const TypeConversionCostTblEntry AVX512FConversionTblWide[] = {
1338+
{ ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i8, 1 },
1339+
{ ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i8, 1 },
1340+
};
1341+
13331342
static const TypeConversionCostTblEntry AVX512FConversionTbl[] = {
13341343
{ ISD::FP_EXTEND, MVT::v8f64, MVT::v8f32, 1 },
13351344
{ ISD::FP_EXTEND, MVT::v8f64, MVT::v16f32, 3 },
@@ -1347,8 +1356,6 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
13471356
{ ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 1 },
13481357
{ ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 1 },
13491358
{ ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 1 },
1350-
{ ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i8, 1 },
1351-
{ ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i8, 1 },
13521359
{ ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i16, 1 },
13531360
{ ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i16, 1 },
13541361
{ ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i32, 1 },
@@ -1401,19 +1408,28 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
14011408
{ ISD::FP_TO_UINT, MVT::v16i8, MVT::v16f32, 2 },
14021409
};
14031410

1411+
static const TypeConversionCostTblEntry AVX2ConversionTblWide[] = {
1412+
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 1 },
1413+
{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i8, 1 },
1414+
{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 1 },
1415+
{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 1 },
1416+
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 1 },
1417+
{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 1 },
1418+
};
1419+
14041420
static const TypeConversionCostTblEntry AVX2ConversionTbl[] = {
14051421
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1, 3 },
14061422
{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i1, 3 },
14071423
{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 3 },
14081424
{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 3 },
1409-
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 1 },
1410-
{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i8, 1 },
1411-
{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 1 },
1412-
{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 1 },
1425+
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 3 },
1426+
{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i8, 3 },
1427+
{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
1428+
{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
14131429
{ ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 1 },
14141430
{ ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 1 },
1415-
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 1 },
1416-
{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 1 },
1431+
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
1432+
{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
14171433
{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 1 },
14181434
{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 1 },
14191435
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 1 },
@@ -1432,18 +1448,24 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
14321448
{ ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 8 },
14331449
};
14341450

1451+
static const TypeConversionCostTblEntry AVXConversionTblWide[] = {
1452+
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 4 },
1453+
{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 4 },
1454+
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 4 },
1455+
};
1456+
14351457
static const TypeConversionCostTblEntry AVXConversionTbl[] = {
14361458
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1, 6 },
14371459
{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i1, 4 },
14381460
{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 7 },
14391461
{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 4 },
1440-
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 4 },
1462+
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 6 },
14411463
{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i8, 4 },
1442-
{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 4 },
1464+
{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 7 },
14431465
{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 4 },
14441466
{ ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 4 },
14451467
{ ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 4 },
1446-
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 4 },
1468+
{ ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 6 },
14471469
{ ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
14481470
{ ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 4 },
14491471
{ ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 4 },
@@ -1642,18 +1664,35 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
16421664
SimpleDstTy, SimpleSrcTy))
16431665
return Entry->Cost;
16441666

1667+
if (ST->hasAVX512() && ExperimentalVectorWideningLegalization)
1668+
if (const auto *Entry = ConvertCostTableLookup(AVX512FConversionTblWide, ISD,
1669+
SimpleDstTy, SimpleSrcTy))
1670+
return Entry->Cost;
1671+
16451672
if (ST->hasAVX512())
16461673
if (const auto *Entry = ConvertCostTableLookup(AVX512FConversionTbl, ISD,
16471674
SimpleDstTy, SimpleSrcTy))
16481675
return Entry->Cost;
16491676
}
16501677

1678+
if (ST->hasAVX2() && ExperimentalVectorWideningLegalization) {
1679+
if (const auto *Entry = ConvertCostTableLookup(AVX2ConversionTblWide, ISD,
1680+
SimpleDstTy, SimpleSrcTy))
1681+
return Entry->Cost;
1682+
}
1683+
16511684
if (ST->hasAVX2()) {
16521685
if (const auto *Entry = ConvertCostTableLookup(AVX2ConversionTbl, ISD,
16531686
SimpleDstTy, SimpleSrcTy))
16541687
return Entry->Cost;
16551688
}
16561689

1690+
if (ST->hasAVX() && ExperimentalVectorWideningLegalization) {
1691+
if (const auto *Entry = ConvertCostTableLookup(AVXConversionTblWide, ISD,
1692+
SimpleDstTy, SimpleSrcTy))
1693+
return Entry->Cost;
1694+
}
1695+
16571696
if (ST->hasAVX()) {
16581697
if (const auto *Entry = ConvertCostTableLookup(AVXConversionTbl, ISD,
16591698
SimpleDstTy, SimpleSrcTy))
@@ -2520,7 +2559,7 @@ int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *ValTy,
25202559
// in the table.
25212560
// FIXME: Is there a better way to do this?
25222561
EVT VT = TLI->getValueType(DL, ValTy);
2523-
if (VT.isSimple()) {
2562+
if (VT.isSimple() && ExperimentalVectorWideningLegalization) {
25242563
MVT MTy = VT.getSimpleVT();
25252564
if (IsPairwise) {
25262565
if (ST->hasAVX())

0 commit comments

Comments
 (0)
Please sign in to comment.