# Changeset View

Changeset View

# Standalone View

Standalone View

# llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp

Show First 20 Lines • Show All 318 Lines • ▼ Show 20 Line(s) | 316 | static const CostTblEntry AVX2CustomCostTable[] = { | |||
---|---|---|---|---|---|

319 | 319 | | |||

320 | { ISD::SRL, MVT::v32i8, 11 }, // vpblendvb sequence. | 320 | { ISD::SRL, MVT::v32i8, 11 }, // vpblendvb sequence. | ||

321 | { ISD::SRL, MVT::v16i16, 10 }, // extend/vpsrlvd/pack sequence. | 321 | { ISD::SRL, MVT::v16i16, 10 }, // extend/vpsrlvd/pack sequence. | ||

322 | 322 | | |||

323 | { ISD::SRA, MVT::v32i8, 24 }, // vpblendvb sequence. | 323 | { ISD::SRA, MVT::v32i8, 24 }, // vpblendvb sequence. | ||

324 | { ISD::SRA, MVT::v16i16, 10 }, // extend/vpsravd/pack sequence. | 324 | { ISD::SRA, MVT::v16i16, 10 }, // extend/vpsravd/pack sequence. | ||

325 | { ISD::SRA, MVT::v2i64, 4 }, // srl/xor/sub sequence. | 325 | { ISD::SRA, MVT::v2i64, 4 }, // srl/xor/sub sequence. | ||

326 | { ISD::SRA, MVT::v4i64, 4 }, // srl/xor/sub sequence. | 326 | { ISD::SRA, MVT::v4i64, 4 }, // srl/xor/sub sequence. | ||

327 | { ISD::FDIV, MVT::f32, 7 }, // Haswell from http://www.agner.org/ | ||||

328 | { ISD::FDIV, MVT::v4f32, 7 }, // Haswell from http://www.agner.org/ | ||||

329 | { ISD::FDIV, MVT::v8f32, 14 }, // Haswell from http://www.agner.org/ | ||||

330 | { ISD::FDIV, MVT::f64, 14 }, // Haswell from http://www.agner.org/ | ||||

331 | { ISD::FDIV, MVT::v2f64, 14 }, // Haswell from http://www.agner.org/ | ||||

332 | { ISD::FDIV, MVT::v4f64, 28 }, // Haswell from http://www.agner.org/ | ||||

327 | }; | 333 | }; | ||

328 | 334 | | |||

329 | // Look for AVX2 lowering tricks for custom cases. | 335 | // Look for AVX2 lowering tricks for custom cases. | ||

330 | if (ST->hasAVX2()) { | 336 | if (ST->hasAVX2()) { | ||

331 | if (const auto *Entry = CostTableLookup(AVX2CustomCostTable, ISD, | 337 | if (const auto *Entry = CostTableLookup(AVX2CustomCostTable, ISD, | ||

332 | LT.second)) | 338 | LT.second)) | ||

333 | return LT.first * Entry->Cost; | 339 | return LT.first * Entry->Cost; | ||

334 | } | 340 | } | ||

335 | 341 | | |||

336 | static const CostTblEntry AVXCustomCostTable[] = { | 342 | static const CostTblEntry AVXCustomCostTable[] = { | ||

343 | { ISD::FDIV, MVT::f32, 14 }, // SNB from http://www.agner.org/ | ||||

344 | { ISD::FDIV, MVT::v4f32, 14 }, // SNB from http://www.agner.org/ | ||||

345 | { ISD::FDIV, MVT::v8f32, 28 }, // SNB from http://www.agner.org/ | ||||

346 | { ISD::FDIV, MVT::f64, 22 }, // SNB from http://www.agner.org/ | ||||

347 | { ISD::FDIV, MVT::v2f64, 22 }, // SNB from http://www.agner.org/ | ||||

348 | { ISD::FDIV, MVT::v4f64, 44 }, // SNB from http://www.agner.org/ | ||||

337 | // Vectorizing division is a bad idea. See the SSE2 table for more comments. | 349 | // Vectorizing division is a bad idea. See the SSE2 table for more comments. | ||

338 | { ISD::SDIV, MVT::v32i8, 32*20 }, | 350 | { ISD::SDIV, MVT::v32i8, 32*20 }, | ||

339 | { ISD::SDIV, MVT::v16i16, 16*20 }, | 351 | { ISD::SDIV, MVT::v16i16, 16*20 }, | ||

340 | { ISD::SDIV, MVT::v8i32, 8*20 }, | 352 | { ISD::SDIV, MVT::v8i32, 8*20 }, | ||

341 | { ISD::SDIV, MVT::v4i64, 4*20 }, | 353 | { ISD::SDIV, MVT::v4i64, 4*20 }, | ||

342 | { ISD::UDIV, MVT::v32i8, 32*20 }, | 354 | { ISD::UDIV, MVT::v32i8, 32*20 }, | ||

343 | { ISD::UDIV, MVT::v16i16, 16*20 }, | 355 | { ISD::UDIV, MVT::v16i16, 16*20 }, | ||

344 | { ISD::UDIV, MVT::v8i32, 8*20 }, | 356 | { ISD::UDIV, MVT::v8i32, 8*20 }, | ||

345 | { ISD::UDIV, MVT::v4i64, 4*20 }, | 357 | { ISD::UDIV, MVT::v4i64, 4*20 }, | ||

346 | }; | 358 | }; | ||

347 | 359 | | |||

348 | // Look for AVX2 lowering tricks for custom cases. | 360 | // Look for AVX2 lowering tricks for custom cases. | ||

349 | if (ST->hasAVX()) { | 361 | if (ST->hasAVX()) { | ||

350 | if (const auto *Entry = CostTableLookup(AVXCustomCostTable, ISD, | 362 | if (const auto *Entry = CostTableLookup(AVXCustomCostTable, ISD, | ||

351 | LT.second)) | 363 | LT.second)) | ||

352 | return LT.first * Entry->Cost; | 364 | return LT.first * Entry->Cost; | ||

353 | } | 365 | } | ||

354 | 366 | | |||

367 | static const CostTblEntry SSE42FloatCostTable[] = { | ||||

368 | { ISD::FDIV, MVT::f32, 14 }, // Nehalem from http://www.agner.org/ | ||||

369 | { ISD::FDIV, MVT::v4f32, 14 }, // Nehalem from http://www.agner.org/ | ||||

370 | { ISD::FDIV, MVT::f64, 22 }, // Nehalem from http://www.agner.org/ | ||||

371 | { ISD::FDIV, MVT::v2f64, 22 }, // Nehalem from http://www.agner.org/ | ||||

372 | }; | ||||

373 | | ||||

374 | if (ST->hasSSE42()) { | ||||

375 | if (const auto *Entry = CostTableLookup(SSE42FloatCostTable, ISD, | ||||

376 | LT.second)) | ||||

377 | return LT.first * Entry->Cost; | ||||

378 | } | ||||

379 | | ||||

355 | static const CostTblEntry | 380 | static const CostTblEntry | ||

356 | SSE2UniformCostTable[] = { | 381 | SSE2UniformCostTable[] = { | ||

357 | // Uniform splats are cheaper for the following instructions. | 382 | // Uniform splats are cheaper for the following instructions. | ||

358 | { ISD::SHL, MVT::v16i8, 1 }, // psllw. | 383 | { ISD::SHL, MVT::v16i8, 1 }, // psllw. | ||

359 | { ISD::SHL, MVT::v32i8, 2 }, // psllw. | 384 | { ISD::SHL, MVT::v32i8, 2 }, // psllw. | ||

360 | { ISD::SHL, MVT::v8i16, 1 }, // psllw. | 385 | { ISD::SHL, MVT::v8i16, 1 }, // psllw. | ||

361 | { ISD::SHL, MVT::v16i16, 2 }, // psllw. | 386 | { ISD::SHL, MVT::v16i16, 2 }, // psllw. | ||

362 | { ISD::SHL, MVT::v4i32, 1 }, // pslld | 387 | { ISD::SHL, MVT::v4i32, 1 }, // pslld | ||

▲ Show 20 Lines • Show All 101 Lines • ▼ Show 20 Line(s) | 467 | static const CostTblEntry SSE2CostTable[] = { | |||

464 | { ISD::SRA, MVT::v32i8, 2*54 }, // unpacked cmpgtb sequence. | 489 | { ISD::SRA, MVT::v32i8, 2*54 }, // unpacked cmpgtb sequence. | ||

465 | { ISD::SRA, MVT::v8i16, 32 }, // cmpgtb sequence. | 490 | { ISD::SRA, MVT::v8i16, 32 }, // cmpgtb sequence. | ||

466 | { ISD::SRA, MVT::v16i16, 2*32 }, // cmpgtb sequence. | 491 | { ISD::SRA, MVT::v16i16, 2*32 }, // cmpgtb sequence. | ||

467 | { ISD::SRA, MVT::v4i32, 16 }, // Shift each lane + blend. | 492 | { ISD::SRA, MVT::v4i32, 16 }, // Shift each lane + blend. | ||

468 | { ISD::SRA, MVT::v8i32, 2*16 }, // Shift each lane + blend. | 493 | { ISD::SRA, MVT::v8i32, 2*16 }, // Shift each lane + blend. | ||

469 | { ISD::SRA, MVT::v2i64, 12 }, // srl/xor/sub sequence. | 494 | { ISD::SRA, MVT::v2i64, 12 }, // srl/xor/sub sequence. | ||

470 | { ISD::SRA, MVT::v4i64, 2*12 }, // srl/xor/sub sequence. | 495 | { ISD::SRA, MVT::v4i64, 2*12 }, // srl/xor/sub sequence. | ||

471 | 496 | | |||

497 | { ISD::FDIV, MVT::f32, 23 }, // Pentium IV from http://www.agner.org/ | ||||

498 | { ISD::FDIV, MVT::v4f32, 39 }, // Pentium IV from http://www.agner.org/ | ||||

499 | { ISD::FDIV, MVT::f64, 38 }, // Pentium IV from http://www.agner.org/ | ||||

500 | { ISD::FDIV, MVT::v2f64, 69 }, // Pentium IV from http://www.agner.org/ | ||||

501 | | ||||

472 | // It is not a good idea to vectorize division. We have to scalarize it and | 502 | // It is not a good idea to vectorize division. We have to scalarize it and | ||

473 | // in the process we will often end up having to spilling regular | 503 | // in the process we will often end up having to spilling regular | ||

474 | // registers. The overhead of division is going to dominate most kernels | 504 | // registers. The overhead of division is going to dominate most kernels | ||

475 | // anyways so try hard to prevent vectorization of division - it is | 505 | // anyways so try hard to prevent vectorization of division - it is | ||

476 | // generally a bad idea. Assume somewhat arbitrarily that we have to be able | 506 | // generally a bad idea. Assume somewhat arbitrarily that we have to be able | ||

477 | // to hide "20 cycles" for each lane. | 507 | // to hide "20 cycles" for each lane. | ||

478 | { ISD::SDIV, MVT::v16i8, 16*20 }, | 508 | { ISD::SDIV, MVT::v16i8, 16*20 }, | ||

479 | { ISD::SDIV, MVT::v8i16, 8*20 }, | 509 | { ISD::SDIV, MVT::v8i16, 8*20 }, | ||

▲ Show 20 Lines • Show All 48 Lines • ▼ Show 20 Line(s) | 557 | if (const auto *Entry = CostTableLookup(CustomLowered, ISD, LT.second)) | |||

528 | return LT.first * Entry->Cost; | 558 | return LT.first * Entry->Cost; | ||

529 | 559 | | |||

530 | // Special lowering of v4i32 mul on sse2, sse3: Lower v4i32 mul as 2x shuffle, | 560 | // Special lowering of v4i32 mul on sse2, sse3: Lower v4i32 mul as 2x shuffle, | ||

531 | // 2x pmuludq, 2x shuffle. | 561 | // 2x pmuludq, 2x shuffle. | ||

532 | if (ISD == ISD::MUL && LT.second == MVT::v4i32 && ST->hasSSE2() && | 562 | if (ISD == ISD::MUL && LT.second == MVT::v4i32 && ST->hasSSE2() && | ||

533 | !ST->hasSSE41()) | 563 | !ST->hasSSE41()) | ||

534 | return LT.first * 6; | 564 | return LT.first * 6; | ||

535 | 565 | | |||

566 | static const CostTblEntry SSE1FloatCostTable[] = { | ||||

567 | { ISD::FDIV, MVT::f32, 17 }, // Pentium III from http://www.agner.org/ | ||||

568 | { ISD::FDIV, MVT::v4f32, 34 }, // Pentium III from http://www.agner.org/ | ||||

569 | }; | ||||

570 | | ||||

571 | if (ST->hasSSE1()) | ||||

572 | if (const auto *Entry = CostTableLookup(SSE1FloatCostTable, ISD, | ||||

573 | LT.second)) | ||||

574 | return LT.first * Entry->Cost; | ||||

536 | // Fallback to the default implementation. | 575 | // Fallback to the default implementation. | ||

537 | return BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info); | 576 | return BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info); | ||

538 | } | 577 | } | ||

539 | 578 | | |||

540 | int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, | 579 | int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, | ||

541 | Type *SubTp) { | 580 | Type *SubTp) { | ||

542 | // We only estimate the cost of reverse and alternate shuffles. | 581 | // We only estimate the cost of reverse and alternate shuffles. | ||

543 | if (Kind != TTI::SK_Reverse && Kind != TTI::SK_Alternate) | 582 | if (Kind != TTI::SK_Reverse && Kind != TTI::SK_Alternate) | ||

▲ Show 20 Lines • Show All 562 Lines • ▼ Show 20 Line(s) | 1134 | static const CostTblEntry AVX2CostTbl[] = { | |||

1106 | { ISD::CTLZ, MVT::v32i8, 9 }, | 1145 | { ISD::CTLZ, MVT::v32i8, 9 }, | ||

1107 | { ISD::CTPOP, MVT::v4i64, 7 }, | 1146 | { ISD::CTPOP, MVT::v4i64, 7 }, | ||

1108 | { ISD::CTPOP, MVT::v8i32, 11 }, | 1147 | { ISD::CTPOP, MVT::v8i32, 11 }, | ||

1109 | { ISD::CTPOP, MVT::v16i16, 9 }, | 1148 | { ISD::CTPOP, MVT::v16i16, 9 }, | ||

1110 | { ISD::CTPOP, MVT::v32i8, 6 }, | 1149 | { ISD::CTPOP, MVT::v32i8, 6 }, | ||

1111 | { ISD::CTTZ, MVT::v4i64, 10 }, | 1150 | { ISD::CTTZ, MVT::v4i64, 10 }, | ||

1112 | { ISD::CTTZ, MVT::v8i32, 14 }, | 1151 | { ISD::CTTZ, MVT::v8i32, 14 }, | ||

1113 | { ISD::CTTZ, MVT::v16i16, 12 }, | 1152 | { ISD::CTTZ, MVT::v16i16, 12 }, | ||

1114 | { ISD::CTTZ, MVT::v32i8, 9 } | 1153 | { ISD::CTTZ, MVT::v32i8, 9 }, | ||

1154 | { ISD::FSQRT, MVT::f32, 7 }, // Haswell from http://www.agner.org/ | ||||

1155 | { ISD::FSQRT, MVT::v4f32, 7 }, // Haswell from http://www.agner.org/ | ||||

1156 | { ISD::FSQRT, MVT::v8f32, 14 }, // Haswell from http://www.agner.org/ | ||||

1157 | { ISD::FSQRT, MVT::f64, 14 }, // Haswell from http://www.agner.org/ | ||||

1158 | { ISD::FSQRT, MVT::v2f64, 14 }, // Haswell from http://www.agner.org/ | ||||

1159 | { ISD::FSQRT, MVT::v4f64, 28 }, // Haswell from http://www.agner.org/ | ||||

1115 | }; | 1160 | }; | ||

1116 | static const CostTblEntry AVX1CostTbl[] = { | 1161 | static const CostTblEntry AVX1CostTbl[] = { | ||

1117 | { ISD::BITREVERSE, MVT::v4i64, 10 }, | 1162 | { ISD::BITREVERSE, MVT::v4i64, 10 }, | ||

1118 | { ISD::BITREVERSE, MVT::v8i32, 10 }, | 1163 | { ISD::BITREVERSE, MVT::v8i32, 10 }, | ||

1119 | { ISD::BITREVERSE, MVT::v16i16, 10 }, | 1164 | { ISD::BITREVERSE, MVT::v16i16, 10 }, | ||

1120 | { ISD::BITREVERSE, MVT::v32i8, 10 }, | 1165 | { ISD::BITREVERSE, MVT::v32i8, 10 }, | ||

1121 | { ISD::BSWAP, MVT::v4i64, 4 }, | 1166 | { ISD::BSWAP, MVT::v4i64, 4 }, | ||

1122 | { ISD::BSWAP, MVT::v8i32, 4 }, | 1167 | { ISD::BSWAP, MVT::v8i32, 4 }, | ||

1123 | { ISD::BSWAP, MVT::v16i16, 4 }, | 1168 | { ISD::BSWAP, MVT::v16i16, 4 }, | ||

1124 | { ISD::CTLZ, MVT::v4i64, 46 }, | 1169 | { ISD::CTLZ, MVT::v4i64, 46 }, | ||

1125 | { ISD::CTLZ, MVT::v8i32, 36 }, | 1170 | { ISD::CTLZ, MVT::v8i32, 36 }, | ||

1126 | { ISD::CTLZ, MVT::v16i16, 28 }, | 1171 | { ISD::CTLZ, MVT::v16i16, 28 }, | ||

1127 | { ISD::CTLZ, MVT::v32i8, 18 }, | 1172 | { ISD::CTLZ, MVT::v32i8, 18 }, | ||

1128 | { ISD::CTPOP, MVT::v4i64, 14 }, | 1173 | { ISD::CTPOP, MVT::v4i64, 14 }, | ||

1129 | { ISD::CTPOP, MVT::v8i32, 22 }, | 1174 | { ISD::CTPOP, MVT::v8i32, 22 }, | ||

1130 | { ISD::CTPOP, MVT::v16i16, 18 }, | 1175 | { ISD::CTPOP, MVT::v16i16, 18 }, | ||

1131 | { ISD::CTPOP, MVT::v32i8, 12 }, | 1176 | { ISD::CTPOP, MVT::v32i8, 12 }, | ||

1132 | { ISD::CTTZ, MVT::v4i64, 20 }, | 1177 | { ISD::CTTZ, MVT::v4i64, 20 }, | ||

1133 | { ISD::CTTZ, MVT::v8i32, 28 }, | 1178 | { ISD::CTTZ, MVT::v8i32, 28 }, | ||

1134 | { ISD::CTTZ, MVT::v16i16, 24 }, | 1179 | { ISD::CTTZ, MVT::v16i16, 24 }, | ||

1135 | { ISD::CTTZ, MVT::v32i8, 18 }, | 1180 | { ISD::CTTZ, MVT::v32i8, 18 }, | ||

1181 | { ISD::FSQRT, MVT::f32, 14 }, // SNB from http://www.agner.org/ | ||||

1182 | { ISD::FSQRT, MVT::v4f32, 14 }, // SNB from http://www.agner.org/ | ||||

1183 | { ISD::FSQRT, MVT::v8f32, 28 }, // SNB from http://www.agner.org/ | ||||

1184 | { ISD::FSQRT, MVT::f64, 21 }, // SNB from http://www.agner.org/ | ||||

1185 | { ISD::FSQRT, MVT::v2f64, 21 }, // SNB from http://www.agner.org/ | ||||

1186 | { ISD::FSQRT, MVT::v4f64, 43 }, // SNB from http://www.agner.org/ | ||||

1187 | }; | ||||

1188 | static const CostTblEntry SSE42CostTbl[] = { | ||||

1189 | { ISD::FSQRT, MVT::f32, 18 }, // Nehalem from http://www.agner.org/ | ||||

1190 | { ISD::FSQRT, MVT::v4f32, 18 }, // Nehalem from http://www.agner.org/ | ||||

1136 | }; | 1191 | }; | ||

1137 | static const CostTblEntry SSSE3CostTbl[] = { | 1192 | static const CostTblEntry SSSE3CostTbl[] = { | ||

1138 | { ISD::BITREVERSE, MVT::v2i64, 5 }, | 1193 | { ISD::BITREVERSE, MVT::v2i64, 5 }, | ||

1139 | { ISD::BITREVERSE, MVT::v4i32, 5 }, | 1194 | { ISD::BITREVERSE, MVT::v4i32, 5 }, | ||

1140 | { ISD::BITREVERSE, MVT::v8i16, 5 }, | 1195 | { ISD::BITREVERSE, MVT::v8i16, 5 }, | ||

1141 | { ISD::BITREVERSE, MVT::v16i8, 5 }, | 1196 | { ISD::BITREVERSE, MVT::v16i8, 5 }, | ||

1142 | { ISD::BSWAP, MVT::v2i64, 1 }, | 1197 | { ISD::BSWAP, MVT::v2i64, 1 }, | ||

1143 | { ISD::BSWAP, MVT::v4i32, 1 }, | 1198 | { ISD::BSWAP, MVT::v4i32, 1 }, | ||

Show All 18 Lines | 1213 | static const CostTblEntry SSE2CostTbl[] = { | |||

1162 | /* ISD::CTLZ - currently scalarized pre-SSSE3 */ | 1217 | /* ISD::CTLZ - currently scalarized pre-SSSE3 */ | ||

1163 | { ISD::CTPOP, MVT::v2i64, 12 }, | 1218 | { ISD::CTPOP, MVT::v2i64, 12 }, | ||

1164 | { ISD::CTPOP, MVT::v4i32, 15 }, | 1219 | { ISD::CTPOP, MVT::v4i32, 15 }, | ||

1165 | { ISD::CTPOP, MVT::v8i16, 13 }, | 1220 | { ISD::CTPOP, MVT::v8i16, 13 }, | ||

1166 | { ISD::CTPOP, MVT::v16i8, 10 }, | 1221 | { ISD::CTPOP, MVT::v16i8, 10 }, | ||

1167 | { ISD::CTTZ, MVT::v2i64, 14 }, | 1222 | { ISD::CTTZ, MVT::v2i64, 14 }, | ||

1168 | { ISD::CTTZ, MVT::v4i32, 18 }, | 1223 | { ISD::CTTZ, MVT::v4i32, 18 }, | ||

1169 | { ISD::CTTZ, MVT::v8i16, 16 }, | 1224 | { ISD::CTTZ, MVT::v8i16, 16 }, | ||

1170 | { ISD::CTTZ, MVT::v16i8, 13 } | 1225 | { ISD::CTTZ, MVT::v16i8, 13 }, | ||

1226 | { ISD::FSQRT, MVT::f64, 32 }, // Nehalem from http://www.agner.org/ | ||||

1227 | { ISD::FSQRT, MVT::v2f64, 32 }, // Nehalem from http://www.agner.org/ | ||||

1228 | }; | ||||

1229 | static const CostTblEntry SSE1CostTbl[] = { | ||||

1230 | { ISD::FSQRT, MVT::f32, 28 }, // Pentium III from http://www.agner.org/ | ||||

1231 | { ISD::FSQRT, MVT::v4f32, 56 }, // Pentium III from http://www.agner.org/ | ||||

1171 | }; | 1232 | }; | ||

1172 | 1233 | | |||

1173 | unsigned ISD = ISD::DELETED_NODE; | 1234 | unsigned ISD = ISD::DELETED_NODE; | ||

1174 | switch (IID) { | 1235 | switch (IID) { | ||

1175 | default: | 1236 | default: | ||

1176 | break; | 1237 | break; | ||

1177 | case Intrinsic::bitreverse: | 1238 | case Intrinsic::bitreverse: | ||

1178 | ISD = ISD::BITREVERSE; | 1239 | ISD = ISD::BITREVERSE; | ||

1179 | break; | 1240 | break; | ||

1180 | case Intrinsic::bswap: | 1241 | case Intrinsic::bswap: | ||

1181 | ISD = ISD::BSWAP; | 1242 | ISD = ISD::BSWAP; | ||

1182 | break; | 1243 | break; | ||

1183 | case Intrinsic::ctlz: | 1244 | case Intrinsic::ctlz: | ||

1184 | ISD = ISD::CTLZ; | 1245 | ISD = ISD::CTLZ; | ||

1185 | break; | 1246 | break; | ||

1186 | case Intrinsic::ctpop: | 1247 | case Intrinsic::ctpop: | ||

1187 | ISD = ISD::CTPOP; | 1248 | ISD = ISD::CTPOP; | ||

1188 | break; | 1249 | break; | ||

1189 | case Intrinsic::cttz: | 1250 | case Intrinsic::cttz: | ||

1190 | ISD = ISD::CTTZ; | 1251 | ISD = ISD::CTTZ; | ||

1191 | break; | 1252 | break; | ||

1253 | case Intrinsic::sqrt: | ||||

1254 | ISD = ISD::FSQRT; | ||||

1255 | break; | ||||

1192 | } | 1256 | } | ||

1193 | 1257 | | |||

1194 | // Legalize the type. | 1258 | // Legalize the type. | ||

1195 | std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, RetTy); | 1259 | std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, RetTy); | ||

1196 | MVT MTy = LT.second; | 1260 | MVT MTy = LT.second; | ||

1197 | 1261 | | |||

1198 | // Attempt to lookup cost. | 1262 | // Attempt to lookup cost. | ||

1199 | if (ST->hasXOP()) | 1263 | if (ST->hasXOP()) | ||

1200 | if (const auto *Entry = CostTableLookup(XOPCostTbl, ISD, MTy)) | 1264 | if (const auto *Entry = CostTableLookup(XOPCostTbl, ISD, MTy)) | ||

1201 | return LT.first * Entry->Cost; | 1265 | return LT.first * Entry->Cost; | ||

1202 | 1266 | | |||

1203 | if (ST->hasAVX2()) | 1267 | if (ST->hasAVX2()) | ||

1204 | if (const auto *Entry = CostTableLookup(AVX2CostTbl, ISD, MTy)) | 1268 | if (const auto *Entry = CostTableLookup(AVX2CostTbl, ISD, MTy)) | ||

1205 | return LT.first * Entry->Cost; | 1269 | return LT.first * Entry->Cost; | ||

1206 | 1270 | | |||

1207 | if (ST->hasAVX()) | 1271 | if (ST->hasAVX()) | ||

1208 | if (const auto *Entry = CostTableLookup(AVX1CostTbl, ISD, MTy)) | 1272 | if (const auto *Entry = CostTableLookup(AVX1CostTbl, ISD, MTy)) | ||

1209 | return LT.first * Entry->Cost; | 1273 | return LT.first * Entry->Cost; | ||

1210 | 1274 | | |||

1275 | if (ST->hasSSE42()) | ||||

1276 | if (const auto *Entry = CostTableLookup(SSE42CostTbl, ISD, MTy)) | ||||

1277 | return LT.first * Entry->Cost; | ||||

1278 | | ||||

1211 | if (ST->hasSSSE3()) | 1279 | if (ST->hasSSSE3()) | ||

1212 | if (const auto *Entry = CostTableLookup(SSSE3CostTbl, ISD, MTy)) | 1280 | if (const auto *Entry = CostTableLookup(SSSE3CostTbl, ISD, MTy)) | ||

1213 | return LT.first * Entry->Cost; | 1281 | return LT.first * Entry->Cost; | ||

1214 | 1282 | | |||

1215 | if (ST->hasSSE2()) | 1283 | if (ST->hasSSE2()) | ||

1216 | if (const auto *Entry = CostTableLookup(SSE2CostTbl, ISD, MTy)) | 1284 | if (const auto *Entry = CostTableLookup(SSE2CostTbl, ISD, MTy)) | ||

1217 | return LT.first * Entry->Cost; | 1285 | return LT.first * Entry->Cost; | ||

1218 | 1286 | | |||

1287 | if (ST->hasSSE1()) | ||||

1288 | if (const auto *Entry = CostTableLookup(SSE1CostTbl, ISD, MTy)) | ||||

1289 | return LT.first * Entry->Cost; | ||||

1290 | | ||||

1219 | return BaseT::getIntrinsicInstrCost(IID, RetTy, Tys, FMF); | 1291 | return BaseT::getIntrinsicInstrCost(IID, RetTy, Tys, FMF); | ||

1220 | } | 1292 | } | ||

1221 | 1293 | | |||

1222 | int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, | 1294 | int X86TTIImpl::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, | ||

1223 | ArrayRef<Value *> Args, FastMathFlags FMF) { | 1295 | ArrayRef<Value *> Args, FastMathFlags FMF) { | ||

1224 | return BaseT::getIntrinsicInstrCost(IID, RetTy, Args, FMF); | 1296 | return BaseT::getIntrinsicInstrCost(IID, RetTy, Args, FMF); | ||

1225 | } | 1297 | } | ||

1226 | 1298 | | |||

▲ Show 20 Lines • Show All 590 Lines • Show Last 20 Lines |