Index: include/llvm/MC/MCSchedule.h =================================================================== --- include/llvm/MC/MCSchedule.h +++ include/llvm/MC/MCSchedule.h @@ -22,6 +22,8 @@ namespace llvm { +template class ArrayRef; + struct InstrItinerary; class MCSubtargetInfo; class MCInstrInfo; @@ -373,6 +375,14 @@ getReciprocalThroughput(const MCSubtargetInfo &STI, const MCInstrInfo &MCII, const MCInst &Inst) const; + /// Returns the reciprocal throughput information from a MIOpcode sequence. + /// Instruction dependencies are ignored and the aim is to estimate resource + /// pressure due to the sequence, such as when used in a loop. + double + getReciprocalThroughput(ArrayRef MIOpcodes, + const MCInstrInfo &MCII, + const MCSubtargetInfo &STI) const; + /// Returns the default initialized model. static const MCSchedModel &GetDefaultSchedModel() { return Default; } static const MCSchedModel Default; Index: lib/MC/MCSchedule.cpp =================================================================== --- lib/MC/MCSchedule.cpp +++ lib/MC/MCSchedule.cpp @@ -150,3 +150,47 @@ // that it can execute at the maximum default issue width. return 1.0 / DefaultIssueWidth; } + +double MCSchedModel::getReciprocalThroughput(ArrayRef MIOpcodes, + const MCInstrInfo &MCII, + const MCSubtargetInfo &STI) const { + assert(!MIOpcodes.empty() && "Empty instruction sequence"); + unsigned CPUID = getProcessorID(); + + // Add all resource usages together for all instructions. + unsigned NumMicroOps = 0; + unsigned NumResources = getNumProcResourceKinds(); + SmallVector ResourceUsage(NumResources, 0); + for (unsigned MIOpcode : MIOpcodes) { + const MCInstrDesc &MCDesc = MCII.get(MIOpcode); + unsigned SCIdx = MCDesc.getSchedClass(); + const MCSchedClassDesc *SCDesc = getSchedClassDesc(SCIdx); + while (SCDesc->isVariant()) { + SCIdx = STI.resolveVariantSchedClass(SCIdx, nullptr, CPUID); + SCDesc = getSchedClassDesc(SCIdx); + } + NumMicroOps += SCDesc->NumMicroOps; + for (const MCWriteProcResEntry *WPR = STI.getWriteProcResBegin(SCDesc), + *WEnd = STI.getWriteProcResEnd(SCDesc); + WPR != WEnd; ++WPR) { + ResourceUsage[WPR->ProcResourceIdx] += WPR->Cycles; + } + } + + // Find peak throughput. + Optional Throughput; + for (unsigned ResIdx = 0; ResIdx != NumResources; ++ResIdx) { + unsigned NumCycles = ResourceUsage[ResIdx]; + if (NumCycles) { + unsigned NumUnits = getProcResource(ResIdx)->NumUnits; + double Temp = NumUnits * (1.0 / NumCycles); + Throughput = Throughput ? std::min(Throughput.getValue(), Temp) : Temp; + } + } + if (Throughput.hasValue()) + return 1.0 / Throughput.getValue(); + + // If no throughput value was calculated, assume that we can execute at the + // maximum issue width scaled by number of micro-ops for the schedule class. + return ((double)NumMicroOps) / IssueWidth; +} Index: lib/Target/X86/X86ScheduleBtVer2.td =================================================================== --- lib/Target/X86/X86ScheduleBtVer2.td +++ lib/Target/X86/X86ScheduleBtVer2.td @@ -22,10 +22,7 @@ let HighLatency = 25; let MispredictPenalty = 14; // Minimum branch misdirection penalty let PostRAScheduler = 1; - - // FIXME: SSE4/AVX is unimplemented. This flag is set to allow - // the scheduler to assign a default model to unrecognized opcodes. - let CompleteModel = 0; + let CompleteModel = 1; } let SchedModel = BtVer2Model in { Index: lib/Target/X86/X86Subtarget.h =================================================================== --- lib/Target/X86/X86Subtarget.h +++ lib/Target/X86/X86Subtarget.h @@ -468,6 +468,10 @@ unsigned PreferVectorWidthOverride, unsigned RequiredVectorWidth); + const X86TargetMachine *getTargetMachine() const { + return reinterpret_cast(&TM); + } + const X86TargetLowering *getTargetLowering() const override { return &TLInfo; } Index: lib/Target/X86/X86TargetTransformInfo.cpp =================================================================== --- lib/Target/X86/X86TargetTransformInfo.cpp +++ lib/Target/X86/X86TargetTransformInfo.cpp @@ -57,6 +57,49 @@ // //===----------------------------------------------------------------------===// +namespace { +/// Schedule Cost Table Entry +/// With a suitable scheduler, this will determine the cost from the reciprocal +/// throughput of the MIOpcodes sequence (if it exists). Else it will use +/// DefaultCost in the same manner as CostTblEntry. +struct SchedCostTblEntry { + int ISD; + MVT::SimpleValueType Type; + unsigned DefaultCost; + const std::initializer_list &MIOpcodes; +}; +} // namespace + +/// Find in schedule cost table, TypeTy must be comparable to CompareTy by ==. +static const SchedCostTblEntry * +SchedCostTableLookup(ArrayRef Tbl, int ISD, MVT Ty) { + auto I = find_if(Tbl, [=](const SchedCostTblEntry &Entry) { + return ISD == Entry.ISD && Ty == Entry.Type; + }); + if (I != Tbl.end()) + return I; + + // Could not find an entry. + return nullptr; +} + +/// For a complete (instruction-level) scheduler, determine the cost +/// from the SchedCostTblEntry::MIOpcodes sequence, else use DefaultCost. +static int getSchedModelCost(const SchedCostTblEntry *Entry, + const X86Subtarget *ST) { + const MCSchedModel &SM = ST->getSchedModel(); + if (Entry->MIOpcodes.size() == 0 || !SM.hasInstrSchedModel() || + !SM.CompleteModel) + return Entry->DefaultCost; + + // TODO - multiplying Cost by SM.IssueWidth would improve accuracy of the + // costs for scalar vs vector but would need to be done for all cases, + // not just SchedCostTblEntry cases. + const llvm::MCInstrInfo &MCII = *ST->getTargetMachine()->getMCInstrInfo(); + double Cost = SM.getReciprocalThroughput(Entry->MIOpcodes, MCII, *ST); + return std::max((int)Cost, 1); +} + TargetTransformInfo::PopcntSupportKind X86TTIImpl::getPopcntSupport(unsigned TyWidth) { assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); @@ -650,41 +693,328 @@ if (const auto *Entry = CostTableLookup(AVX2CostTable, ISD, LT.second)) return LT.first * Entry->Cost; - static const CostTblEntry AVX1CostTable[] = { - // We don't have to scalarize unsupported ops. We can issue two half-sized - // operations and we only need to extract the upper YMM half. - // Two ops + 1 extract + 1 insert = 4. - { ISD::MUL, MVT::v16i16, 4 }, - { ISD::MUL, MVT::v8i32, 4 }, - { ISD::SUB, MVT::v32i8, 4 }, - { ISD::ADD, MVT::v32i8, 4 }, - { ISD::SUB, MVT::v16i16, 4 }, - { ISD::ADD, MVT::v16i16, 4 }, - { ISD::SUB, MVT::v8i32, 4 }, - { ISD::ADD, MVT::v8i32, 4 }, - { ISD::SUB, MVT::v4i64, 4 }, - { ISD::ADD, MVT::v4i64, 4 }, - - // A v4i64 multiply is custom lowered as two split v2i64 vectors that then - // are lowered as a series of long multiplies(3), shifts(3) and adds(2) - // Because we believe v4i64 to be a legal type, we must also include the - // extract+insert in the cost table. Therefore, the cost here is 18 - // instead of 8. - { ISD::MUL, MVT::v4i64, 18 }, - - { ISD::MUL, MVT::v32i8, 26 }, // extend/pmullw/trunc sequence. - - { ISD::FDIV, MVT::f32, 14 }, // SNB from http://www.agner.org/ - { ISD::FDIV, MVT::v4f32, 14 }, // SNB from http://www.agner.org/ - { ISD::FDIV, MVT::v8f32, 28 }, // SNB from http://www.agner.org/ - { ISD::FDIV, MVT::f64, 22 }, // SNB from http://www.agner.org/ - { ISD::FDIV, MVT::v2f64, 22 }, // SNB from http://www.agner.org/ - { ISD::FDIV, MVT::v4f64, 44 }, // SNB from http://www.agner.org/ + static const SchedCostTblEntry AVX1CostTable[] = { + { ISD::FADD, MVT::v4f64, 2, { X86::VADDPDYrr } }, + { ISD::FADD, MVT::v8f32, 2, { X86::VADDPSYrr } }, + { ISD::FADD, MVT::v2f64, 1, { X86::VADDPDrr } }, + { ISD::FADD, MVT::v4f32, 1, { X86::VADDPSrr } }, + { ISD::FADD, MVT::f64, 1, { X86::VADDSDrr } }, + { ISD::FADD, MVT::f32, 1, { X86::VADDSSrr } }, + + { ISD::FSUB, MVT::v4f64, 2, { X86::VSUBPDYrr } }, + { ISD::FSUB, MVT::v8f32, 2, { X86::VSUBPSYrr } }, + { ISD::FSUB, MVT::v2f64, 1, { X86::VSUBPDrr } }, + { ISD::FSUB, MVT::v4f32, 1, { X86::VSUBPSrr } }, + { ISD::FSUB, MVT::f64, 1, { X86::VSUBSDrr } }, + { ISD::FSUB, MVT::f32, 1, { X86::VSUBSSrr } }, + + { ISD::FMUL, MVT::v4f64, 2, { X86::VMULPDYrr } }, + { ISD::FMUL, MVT::v8f32, 2, { X86::VMULPSYrr } }, + { ISD::FMUL, MVT::v2f64, 1, { X86::VMULPDrr } }, + { ISD::FMUL, MVT::v4f32, 1, { X86::VMULPSrr } }, + { ISD::FMUL, MVT::f64, 1, { X86::VMULSDrr } }, + { ISD::FMUL, MVT::f32, 1, { X86::VMULSSrr } }, + + // SNB from http://www.agner.org/ + { ISD::FDIV, MVT::v4f64, 44, { X86::VDIVPDYrr } }, + { ISD::FDIV, MVT::v8f32, 28, { X86::VDIVPSYrr } }, + { ISD::FDIV, MVT::v2f64, 22, { X86::VDIVPDrr } }, + { ISD::FDIV, MVT::v4f32, 14, { X86::VDIVPSrr } }, + { ISD::FDIV, MVT::f64, 22, { X86::VDIVSDrr } }, + { ISD::FDIV, MVT::f32, 14, { X86::VDIVSSrr } }, + + { ISD::ADD, MVT::v4i64, 4, { X86::VEXTRACTF128rr, X86::VEXTRACTF128rr, + X86::VPADDQrr, X86::VPADDQrr, + X86::VINSERTF128rr } }, + { ISD::ADD, MVT::v8i32, 4, { X86::VEXTRACTF128rr, X86::VEXTRACTF128rr, + X86::VPADDDrr, X86::VPADDDrr, + X86::VINSERTF128rr } }, + { ISD::ADD, MVT::v16i16, 4, { X86::VEXTRACTF128rr, X86::VEXTRACTF128rr, + X86::VPADDWrr, X86::VPADDWrr, + X86::VINSERTF128rr } }, + { ISD::ADD, MVT::v32i8, 4, { X86::VEXTRACTF128rr, X86::VEXTRACTF128rr, + X86::VPADDBrr, X86::VPADDBrr, + X86::VINSERTF128rr } }, + { ISD::ADD, MVT::v2i64, 1, { X86::VPADDQrr } }, + { ISD::ADD, MVT::v4i32, 1, { X86::VPADDDrr } }, + { ISD::ADD, MVT::v8i16, 1, { X86::VPADDWrr } }, + { ISD::ADD, MVT::v16i8, 1, { X86::VPADDBrr } }, + + { ISD::SUB, MVT::v4i64, 4, { X86::VEXTRACTF128rr, X86::VEXTRACTF128rr, + X86::VPSUBQrr, X86::VPSUBQrr, + X86::VINSERTF128rr } }, + { ISD::SUB, MVT::v8i32, 4, { X86::VEXTRACTF128rr, X86::VEXTRACTF128rr, + X86::VPSUBDrr, X86::VPSUBDrr, + X86::VINSERTF128rr } }, + { ISD::SUB, MVT::v16i16, 4, { X86::VEXTRACTF128rr, X86::VEXTRACTF128rr, + X86::VPSUBWrr, X86::VPSUBWrr, + X86::VINSERTF128rr } }, + { ISD::SUB, MVT::v32i8, 4, { X86::VEXTRACTF128rr, X86::VEXTRACTF128rr, + X86::VPSUBBrr, X86::VPSUBBrr, + X86::VINSERTF128rr } }, + { ISD::SUB, MVT::v2i64, 1, { X86::VPSUBQrr } }, + { ISD::SUB, MVT::v4i32, 1, { X86::VPSUBDrr } }, + { ISD::SUB, MVT::v8i16, 1, { X86::VPSUBWrr } }, + { ISD::SUB, MVT::v16i8, 1, { X86::VPSUBBrr } }, + + { ISD::MUL, MVT::v4i64, 18, { X86::VEXTRACTF128rr, X86::VEXTRACTF128rr, + X86::VPSRLQri, X86::VPSRLQri, + X86::VPSRLQri, X86::VPSRLQri, + X86::VPMULUDQrr, X86::VPMULUDQrr, X86::VPMULUDQrr, + X86::VPMULUDQrr, X86::VPMULUDQrr, X86::VPMULUDQrr, + X86::VPSLLQri, X86::VPADDQrr, X86::VPADDQrr, + X86::VPSLLQri, X86::VPADDQrr, X86::VPADDQrr, + X86::VINSERTF128rr } }, + { ISD::MUL, MVT::v8i32, 4, { X86::VEXTRACTF128rr, X86::VEXTRACTF128rr, + X86::VPMULLDrr, X86::VPMULLDrr, + X86::VINSERTF128rr } }, + { ISD::MUL, MVT::v16i16, 4, { X86::VEXTRACTF128rr, X86::VEXTRACTF128rr, + X86::VPMULLWrr, X86::VPMULLWrr, + X86::VINSERTF128rr} }, + { ISD::MUL, MVT::v32i8, 26, { X86::VEXTRACTF128rr, X86::VEXTRACTF128rr, + X86::VPUNPCKHBWrr, X86::VPUNPCKHBWrr, + X86::VPUNPCKHBWrr, X86::VPUNPCKHBWrr, + X86::VPMOVZXBWrr, X86::VPMOVZXBWrr, + X86::VPMOVZXBWrr, X86::VPMOVZXBWrr, + X86::VPMULLWrr, X86::VPMULLWrr, + X86::VPMULLWrr, X86::VPMULLWrr, + X86::VPANDrr, X86::VPANDrr, X86::VPACKUSWBrr, + X86::VPANDrr, X86::VPANDrr, X86::VPACKUSWBrr, + X86::VINSERTF128rr } }, + { ISD::MUL, MVT::v2i64, 8, { X86::VPSRLQri, X86::VPSRLQri, + X86::VPMULUDQrr, X86::VPMULUDQrr, X86::VPMULUDQrr, + X86::VPSLLQri, X86::VPADDQrr, X86::VPADDQrr } }, + { ISD::MUL, MVT::v4i32, 2, { X86::VPMULLDrr } }, + { ISD::MUL, MVT::v8i16, 1, { X86::VPMULLWrr } }, + { ISD::MUL, MVT::v16i8, 12, { X86::VPUNPCKHBWrr, X86::VPUNPCKHBWrr, + X86::VPMOVZXBWrr, X86::VPMOVZXBWrr, + X86::VPMULLWrr, X86::VPMULLWrr, + X86::VPANDrr, X86::VPANDrr, X86::VPACKUSWBrr } }, + + { ISD::SHL, MVT::v4i64, 10, { X86::VEXTRACTF128rr, X86::VEXTRACTF128rr, + X86::VPSLLQrr, X86::VPSHUFDri, + X86::VPSLLQrr, X86::VPSHUFDri, + X86::VPSLLQrr, X86::VPBLENDWrri, + X86::VPSLLQrr, X86::VPBLENDWrri, + X86::VINSERTF128rr } }, + { ISD::SHL, MVT::v8i32, 10, { X86::VEXTRACTF128rr, X86::VEXTRACTF128rr, X86::VINSERTF128rr, + X86::VPSLLDrr, X86::VPADDDrr, + X86::VPSLLDrr, X86::VPADDDrr, + X86::VCVTTPS2DQrr, X86::VPMULLDrr, + X86::VCVTTPS2DQrr, X86::VPMULLDrr } }, + { ISD::SHL, MVT::v16i16, 30, { X86::VEXTRACTF128rr, X86::VEXTRACTF128rr, + X86::VPSLLWrr, X86::VPSLLWrr, + X86::VPSLLWrr, X86::VPSLLWrr, + X86::VPSLLWrr, X86::VPSLLWrr, + X86::VPSLLWrr, X86::VPSLLWrr, + X86::VPSLLWrr, X86::VPSLLWrr, + X86::VPSLLWrr, X86::VPSLLWrr, + X86::VPADDWrr, X86::VPADDWrr, X86::VPADDWrr, + X86::VPADDWrr, X86::VPADDWrr, X86::VPADDWrr, + X86::VPORrr, X86::VPBLENDVBrr, X86::VPBLENDVBrr, + X86::VPORrr, X86::VPBLENDVBrr, X86::VPBLENDVBrr, + X86::VPBLENDVBrr, X86::VPBLENDVBrr, + X86::VPBLENDVBrr, X86::VPBLENDVBrr, + X86::VINSERTF128rr } }, + { ISD::SHL, MVT::v32i8, 24, { X86::VEXTRACTF128rr, X86::VEXTRACTF128rr, + X86::VPSLLWrr, X86::VPSLLWrr, X86::VPSLLWrr, + X86::VPSLLWrr, X86::VPSLLWrr, X86::VPSLLWrr, + X86::VPANDrr, X86::VPANDrr, + X86::VPANDrr, X86::VPANDrr, + X86::VPADDBrr, X86::VPADDBrr, X86::VPADDBrr, + X86::VPADDBrr, X86::VPADDBrr, X86::VPADDBrr, + X86::VPBLENDVBrr, X86::VPBLENDVBrr, X86::VPBLENDVBrr, + X86::VPBLENDVBrr, X86::VPBLENDVBrr, X86::VPBLENDVBrr, + X86::VINSERTF128rr } }, + { ISD::SHL, MVT::v2i64, 4, { X86::VPSLLQrr, X86::VPSHUFDri, + X86::VPSLLQrr, X86::VPBLENDWrri } }, + { ISD::SHL, MVT::v4i32, 4, { X86::VPSLLDrr, X86::VPADDDrr, + X86::VCVTTPS2DQrr, X86::VPMULLDrr } }, + { ISD::SHL, MVT::v8i16, 14, { X86::VPSLLWrr, X86::VPSLLWrr, + X86::VPSLLWrr, X86::VPSLLWrr, + X86::VPSLLWrr, X86::VPSLLWrr, + X86::VPADDWrr, X86::VPADDWrr, X86::VPADDWrr, + X86::VPORrr, X86::VPBLENDVBrr, X86::VPBLENDVBrr, + X86::VPBLENDVBrr, X86::VPBLENDVBrr } }, + { ISD::SHL, MVT::v16i8, 11, { X86::VPSLLWrr, X86::VPSLLWrr, X86::VPSLLWrr, + X86::VPANDrr, X86::VPANDrr, + X86::VPADDBrr, X86::VPADDBrr, X86::VPADDBrr, + X86::VPBLENDVBrr, X86::VPBLENDVBrr, X86::VPBLENDVBrr } }, + + { ISD::SRL, MVT::v4i64, 10, { X86::VEXTRACTF128rr, X86::VEXTRACTF128rr, + X86::VPSRLQrr, X86::VPSHUFDri, + X86::VPSRLQrr, X86::VPSHUFDri, + X86::VPSRLQrr, X86::VPBLENDWrri, + X86::VPSRLQrr, X86::VPBLENDWrri, + X86::VINSERTF128rr } }, + { ISD::SRL, MVT::v8i32, 24, { X86::VEXTRACTF128rr, X86::VEXTRACTF128rr, + X86::VPSRLDrr, X86::VPSRLDrr, + X86::VPSRLDrr, X86::VPSRLDrr, + X86::VPSRLDrr, X86::VPSRLDrr, + X86::VPSRLDrr, X86::VPSRLDrr, + X86::VPSRLDQri, X86::VPSRLQri, + X86::VPSRLDQri, X86::VPSRLQri, + X86::VPUNPCKHDQrr, X86::VPMOVZXDQrr, + X86::VPUNPCKHDQrr, X86::VPMOVZXDQrr, + X86::VPBLENDWrri, X86::VPBLENDWrri, X86::VPBLENDWrri, + X86::VPBLENDWrri, X86::VPBLENDWrri, X86::VPBLENDWrri, + X86::VINSERTF128rr } }, + { ISD::SRL, MVT::v16i16, 30, { X86::VEXTRACTF128rr, X86::VEXTRACTF128rr, X86::VINSERTF128rr, + X86::VPSLLWrr, X86::VPSLLWrr, + X86::VPSLLWrr, X86::VPSLLWrr, + X86::VPSRLWrr, X86::VPSRLWrr, + X86::VPSRLWrr, X86::VPSRLWrr, + X86::VPSRLWrr, X86::VPSRLWrr, + X86::VPSRLWrr, X86::VPSRLWrr, + X86::VPADDWrr, X86::VPADDWrr, X86::VPADDWrr, + X86::VPADDWrr, X86::VPADDWrr, X86::VPADDWrr, + X86::VPORrr, X86::VPBLENDVBrr, X86::VPBLENDVBrr, + X86::VPORrr, X86::VPBLENDVBrr, X86::VPBLENDVBrr, + X86::VPBLENDVBrr, X86::VPBLENDVBrr, + X86::VPBLENDVBrr, X86::VPBLENDVBrr } }, + { ISD::SRL, MVT::v32i8, 26, { X86::VEXTRACTF128rr, X86::VEXTRACTF128rr, + X86::VPSRLWrr, X86::VPSRLWrr, X86::VPSRLWrr, + X86::VPSRLWrr, X86::VPSRLWrr, X86::VPSRLWrr, + X86::VPANDrr, X86::VPANDrr, X86::VPANDrr, + X86::VPANDrr, X86::VPANDrr, X86::VPANDrr, + X86::VPSLLWrr, X86::VPADDBrr, X86::VPADDBrr, + X86::VPSLLWrr, X86::VPADDBrr, X86::VPADDBrr, + X86::VPBLENDVBrr, X86::VPBLENDVBrr, X86::VPBLENDVBrr, + X86::VPBLENDVBrr, X86::VPBLENDVBrr, X86::VPBLENDVBrr, + X86::VINSERTF128rr } }, + { ISD::SRL, MVT::v2i64, 4, { X86::VPSRLQrr, X86::VPSHUFDri, + X86::VPSRLQrr, X86::VPBLENDWrri } }, + { ISD::SRL, MVT::v4i32, 11, { X86::VPSRLDrr, X86::VPSRLDrr, + X86::VPSRLDrr, X86::VPSRLDrr, + X86::VPSRLDQri, X86::VPSRLQri, + X86::VPUNPCKHDQrr, X86::VPMOVZXDQrr, + X86::VPBLENDWrri, X86::VPBLENDWrri, X86::VPBLENDWrri } }, + { ISD::SRL, MVT::v8i16, 14, { X86::VPSLLWrr, X86::VPSLLWrr, + X86::VPSRLWrr, X86::VPSRLWrr, + X86::VPSRLWrr, X86::VPSRLWrr, + X86::VPADDWrr, X86::VPADDWrr, X86::VPADDWrr, + X86::VPORrr, X86::VPBLENDVBrr, X86::VPBLENDVBrr, + X86::VPBLENDVBrr, X86::VPBLENDVBrr } }, + { ISD::SRL, MVT::v16i8, 12, { X86::VPSRLWrr, X86::VPSRLWrr, X86::VPSRLWrr, + X86::VPANDrr, X86::VPANDrr, X86::VPANDrr, + X86::VPSLLWrr, X86::VPADDBrr, X86::VPADDBrr, + X86::VPBLENDVBrr, X86::VPBLENDVBrr, X86::VPBLENDVBrr } }, + + { ISD::SRA, MVT::v4i64, 26, { X86::VEXTRACTF128rr, X86::VEXTRACTF128rr, + X86::VPSRLQrr, X86::VPSRLQrr, + X86::VPSRLQrr, X86::VPSRLQrr, + X86::VPSRLQrr, X86::VPSRLQrr, + X86::VPSRLQrr, X86::VPSRLQrr, + X86::VPSHUFDri, X86::VPBLENDWrri, X86::VPBLENDWrri, + X86::VPSHUFDri, X86::VPBLENDWrri, X86::VPBLENDWrri, + X86::VPORrr, X86::VPSUBQrr, + X86::VPORrr, X86::VPSUBQrr, + X86::VINSERTF128rr } }, + { ISD::SRA, MVT::v8i32, 26, { X86::VEXTRACTF128rr, X86::VEXTRACTF128rr, + X86::VPSRADrr, X86::VPSRADrr, + X86::VPSRADrr, X86::VPSRADrr, + X86::VPSRADrr, X86::VPSRADrr, + X86::VPSRADrr, X86::VPSRADrr, + X86::VPSRLDQri, X86::VPSRLQri, + X86::VPSRLDQri, X86::VPSRLQri, + X86::VPUNPCKHDQrr, X86::VPMOVZXDQrr, + X86::VPUNPCKHDQrr, X86::VPMOVZXDQrr, + X86::VPBLENDWrri, X86::VPBLENDWrri, X86::VPBLENDWrri, + X86::VPBLENDWrri, X86::VPBLENDWrri, X86::VPBLENDWrri, + X86::VINSERTF128rr } }, + { ISD::SRA, MVT::v16i16, 30, { X86::VEXTRACTF128rr, X86::VEXTRACTF128rr, + X86::VPSLLWrr, X86::VPSLLWrr, + X86::VPSLLWrr, X86::VPSLLWrr, + X86::VPSRAWrr, X86::VPSRAWrr, + X86::VPSRAWrr, X86::VPSRAWrr, + X86::VPSRAWrr, X86::VPSRAWrr, + X86::VPSRAWrr, X86::VPSRAWrr, + X86::VPADDWrr, X86::VPADDWrr, X86::VPADDWrr, + X86::VPADDWrr, X86::VPADDWrr, X86::VPADDWrr, + X86::VPORrr, X86::VPBLENDVBrr, X86::VPBLENDVBrr, + X86::VPORrr, X86::VPBLENDVBrr, X86::VPBLENDVBrr, + X86::VPBLENDVBrr, X86::VPBLENDVBrr, + X86::VPBLENDVBrr, X86::VPBLENDVBrr, + X86::VINSERTF128rr } }, + { ISD::SRA, MVT::v32i8, 50, { X86::VEXTRACTF128rr, X86::VEXTRACTF128rr, + X86::VPSLLWrr, + X86::VPSLLWrr, + X86::VPUNPCKHBWrr, X86::VPUNPCKHBWrr, + X86::VPUNPCKHBWrr, X86::VPUNPCKHBWrr, + X86::VPUNPCKLBWrr, X86::VPUNPCKLBWrr, + X86::VPUNPCKLBWrr, X86::VPUNPCKLBWrr, + X86::VPSRAWrr, X86::VPSRAWrr, X86::VPSRAWrr, + X86::VPSRAWrr, X86::VPSRAWrr, X86::VPSRAWrr, + X86::VPSRAWrr, X86::VPSRAWrr, X86::VPSRAWrr, + X86::VPSRAWrr, X86::VPSRAWrr, X86::VPSRAWrr, + X86::VPBLENDVBrr, X86::VPBLENDVBrr, X86::VPBLENDVBrr, + X86::VPBLENDVBrr, X86::VPBLENDVBrr, X86::VPBLENDVBrr, + X86::VPBLENDVBrr, X86::VPBLENDVBrr, X86::VPBLENDVBrr, + X86::VPBLENDVBrr, X86::VPBLENDVBrr, X86::VPBLENDVBrr, + X86::VPADDWrr, X86::VPADDWrr, X86::VPSRLWrr, + X86::VPADDWrr, X86::VPADDWrr, X86::VPSRLWrr, + X86::VPADDWrr, X86::VPADDWrr, X86::VPSRLWrr, + X86::VPADDWrr, X86::VPADDWrr, X86::VPSRLWrr, + X86::VPACKUSWBrr, + X86::VPACKUSWBrr, + X86::VINSERTF128rr } }, + { ISD::SRA, MVT::v2i64, 12, { X86::VPSRLQrr, X86::VPSRLQrr, + X86::VPSRLQrr, X86::VPSRLQrr, + X86::VPSHUFDri, X86::VPBLENDWrri, X86::VPBLENDWrri, + X86::VPORrr, X86::VPSUBQrr } }, + { ISD::SRA, MVT::v4i32, 12, { X86::VPSRADrr, X86::VPSRADrr, + X86::VPSRADrr, X86::VPSRADrr, + X86::VPSRLDQri, X86::VPSRLQri, + X86::VPUNPCKHDQrr, X86::VPMOVZXDQrr, + X86::VPBLENDWrri, X86::VPBLENDWrri, X86::VPBLENDWrri } }, + { ISD::SRA, MVT::v8i16, 14, { X86::VPSLLWrr, X86::VPSLLWrr, + X86::VPSRAWrr, X86::VPSRAWrr, + X86::VPSRAWrr, X86::VPSRAWrr, + X86::VPADDWrr, X86::VPADDWrr, X86::VPADDWrr, + X86::VPORrr, X86::VPBLENDVBrr, X86::VPBLENDVBrr, + X86::VPBLENDVBrr, X86::VPBLENDVBrr } }, + { ISD::SRA, MVT::v16i8, 24, { X86::VPSLLWrr, + X86::VPUNPCKHBWrr, X86::VPUNPCKHBWrr, + X86::VPUNPCKLBWrr, X86::VPUNPCKLBWrr, + X86::VPSRAWrr, X86::VPSRAWrr, X86::VPSRAWrr, + X86::VPSRAWrr, X86::VPSRAWrr, X86::VPSRAWrr, + X86::VPBLENDVBrr, X86::VPBLENDVBrr, X86::VPBLENDVBrr, + X86::VPBLENDVBrr, X86::VPBLENDVBrr, X86::VPBLENDVBrr, + X86::VPADDWrr, X86::VPADDWrr, X86::VPSRLWrr, + X86::VPADDWrr, X86::VPADDWrr, X86::VPSRLWrr, + X86::VPACKUSWBrr } }, + + { ISD::AND, MVT::v4i64, 1, { X86::VANDPSYrr } }, + { ISD::AND, MVT::v8i32, 1, { X86::VANDPSYrr } }, + { ISD::AND, MVT::v16i16, 1, { X86::VANDPSYrr } }, + { ISD::AND, MVT::v32i8, 1, { X86::VANDPSYrr } }, + { ISD::AND, MVT::v2i64, 1, { X86::VPANDrr } }, + { ISD::AND, MVT::v4i32, 1, { X86::VPANDrr } }, + { ISD::AND, MVT::v8i16, 1, { X86::VPANDrr } }, + { ISD::AND, MVT::v16i8, 1, { X86::VPANDrr } }, + + { ISD::OR, MVT::v4i64, 1, { X86::VORPSYrr } }, + { ISD::OR, MVT::v8i32, 1, { X86::VORPSYrr } }, + { ISD::OR, MVT::v16i16, 1, { X86::VORPSYrr } }, + { ISD::OR, MVT::v32i8, 1, { X86::VORPSYrr } }, + { ISD::OR, MVT::v2i64, 1, { X86::VPORrr } }, + { ISD::OR, MVT::v4i32, 1, { X86::VPORrr } }, + { ISD::OR, MVT::v8i16, 1, { X86::VPORrr } }, + { ISD::OR, MVT::v16i8, 1, { X86::VPORrr } }, + + { ISD::XOR, MVT::v4i64, 1, { X86::VXORPSYrr } }, + { ISD::XOR, MVT::v8i32, 1, { X86::VXORPSYrr } }, + { ISD::XOR, MVT::v16i16, 1, { X86::VXORPSYrr } }, + { ISD::XOR, MVT::v32i8, 1, { X86::VXORPSYrr } }, + { ISD::XOR, MVT::v2i64, 1, { X86::VPXORrr } }, + { ISD::XOR, MVT::v4i32, 1, { X86::VPXORrr } }, + { ISD::XOR, MVT::v8i16, 1, { X86::VPXORrr } }, + { ISD::XOR, MVT::v16i8, 1, { X86::VPXORrr } }, }; if (ST->hasAVX()) - if (const auto *Entry = CostTableLookup(AVX1CostTable, ISD, LT.second)) - return LT.first * Entry->Cost; + if (const auto *Entry = SchedCostTableLookup(AVX1CostTable, ISD, LT.second)) + return LT.first * getSchedModelCost(Entry, ST); static const CostTblEntry SSE42CostTable[] = { { ISD::FADD, MVT::f64, 1 }, // Nehalem from http://www.agner.org/ @@ -714,25 +1044,16 @@ static const CostTblEntry SSE41CostTable[] = { { ISD::SHL, MVT::v16i8, 11 }, // pblendvb sequence. - { ISD::SHL, MVT::v32i8, 2*11+2 }, // pblendvb sequence + split. { ISD::SHL, MVT::v8i16, 14 }, // pblendvb sequence. - { ISD::SHL, MVT::v16i16, 2*14+2 }, // pblendvb sequence + split. { ISD::SHL, MVT::v4i32, 4 }, // pslld/paddd/cvttps2dq/pmulld - { ISD::SHL, MVT::v8i32, 2*4+2 }, // pslld/paddd/cvttps2dq/pmulld + split { ISD::SRL, MVT::v16i8, 12 }, // pblendvb sequence. - { ISD::SRL, MVT::v32i8, 2*12+2 }, // pblendvb sequence + split. { ISD::SRL, MVT::v8i16, 14 }, // pblendvb sequence. - { ISD::SRL, MVT::v16i16, 2*14+2 }, // pblendvb sequence + split. { ISD::SRL, MVT::v4i32, 11 }, // Shift each lane + blend. - { ISD::SRL, MVT::v8i32, 2*11+2 }, // Shift each lane + blend + split. { ISD::SRA, MVT::v16i8, 24 }, // pblendvb sequence. - { ISD::SRA, MVT::v32i8, 2*24+2 }, // pblendvb sequence + split. { ISD::SRA, MVT::v8i16, 14 }, // pblendvb sequence. - { ISD::SRA, MVT::v16i16, 2*14+2 }, // pblendvb sequence + split. { ISD::SRA, MVT::v4i32, 12 }, // Shift each lane + blend. - { ISD::SRA, MVT::v8i32, 2*12+2 }, // Shift each lane + blend + split. { ISD::MUL, MVT::v4i32, 2 } // pmulld (Nehalem from agner.org) }; @@ -748,19 +1069,16 @@ { ISD::SHL, MVT::v8i16, 32 }, // cmpgtb sequence. { ISD::SHL, MVT::v4i32, 2*5 }, // We optimized this using mul. { ISD::SHL, MVT::v2i64, 4 }, // splat+shuffle sequence. - { ISD::SHL, MVT::v4i64, 2*4+2 }, // splat+shuffle sequence + split. { ISD::SRL, MVT::v16i8, 26 }, // cmpgtb sequence. { ISD::SRL, MVT::v8i16, 32 }, // cmpgtb sequence. { ISD::SRL, MVT::v4i32, 16 }, // Shift each lane + blend. { ISD::SRL, MVT::v2i64, 4 }, // splat+shuffle sequence. - { ISD::SRL, MVT::v4i64, 2*4+2 }, // splat+shuffle sequence + split. { ISD::SRA, MVT::v16i8, 54 }, // unpacked cmpgtb sequence. { ISD::SRA, MVT::v8i16, 32 }, // cmpgtb sequence. { ISD::SRA, MVT::v4i32, 16 }, // Shift each lane + blend. { ISD::SRA, MVT::v2i64, 12 }, // srl/xor/sub sequence. - { ISD::SRA, MVT::v4i64, 2*12+2 }, // srl/xor/sub sequence+split. { ISD::MUL, MVT::v16i8, 12 }, // extend/pmullw/trunc sequence. { ISD::MUL, MVT::v8i16, 1 }, // pmullw @@ -786,6 +1104,81 @@ if (const auto *Entry = CostTableLookup(SSE1CostTable, ISD, LT.second)) return LT.first * Entry->Cost; + static const SchedCostTblEntry X64CostTable[] = { + { ISD::ADD, MVT::i64, 1, { X86::ADD64rr } }, + { ISD::SUB, MVT::i64, 1, { X86::SUB64rr } }, + { ISD::MUL, MVT::i64, 1, { X86::MUL64r } }, + + { ISD::SHL, MVT::i64, 1, { X86::SHL64rCL } }, + { ISD::SRL, MVT::i64, 1, { X86::SHR64rCL } }, + { ISD::SRA, MVT::i64, 1, { X86::SAR64rCL } }, + + { ISD::AND, MVT::i64, 1, { X86::AND64rr } }, + { ISD::OR, MVT::i64, 1, { X86::OR64rr } }, + { ISD::XOR, MVT::i64, 1, { X86::XOR64rr } }, + + // TODO - move to cost=20 and remove the vector x20 factor? + { ISD::SDIV, MVT::i64, 1, { X86::IDIV64r } }, + { ISD::SREM, MVT::i64, 1, { X86::IDIV64r } }, + { ISD::UDIV, MVT::i64, 1, { X86::DIV64r } }, + { ISD::UREM, MVT::i64, 1, { X86::DIV64r } }, + }; + + if (ST->is64Bit()) + if (const auto *Entry = SchedCostTableLookup(X64CostTable, ISD, LT.second)) + return LT.first * getSchedModelCost(Entry, ST); + + static const SchedCostTblEntry X86CostTable[] = { + // TODO: Add i64 math on 32-bit platforms. + { ISD::ADD, MVT::i32, 1, { X86::ADD32rr } }, + { ISD::ADD, MVT::i16, 1, { X86::ADD16rr } }, + { ISD::ADD, MVT::i8, 1, { X86::ADD8rr } }, + { ISD::SUB, MVT::i32, 1, { X86::SUB32rr } }, + { ISD::SUB, MVT::i16, 1, { X86::SUB16rr } }, + { ISD::SUB, MVT::i8, 1, { X86::SUB8rr } }, + { ISD::MUL, MVT::i32, 1, { X86::MUL32r } }, + { ISD::MUL, MVT::i16, 1, { X86::MUL16r } }, + { ISD::MUL, MVT::i8, 1, { X86::MUL8r } }, + + { ISD::SHL, MVT::i32, 1, { X86::SHL32rCL } }, + { ISD::SHL, MVT::i16, 1, { X86::SHL16rCL } }, + { ISD::SHL, MVT::i8, 1, { X86::SHL8rCL } }, + { ISD::SRL, MVT::i32, 1, { X86::SHR32rCL } }, + { ISD::SRL, MVT::i16, 1, { X86::SHR16rCL } }, + { ISD::SRL, MVT::i8, 1, { X86::SHR8rCL } }, + { ISD::SRA, MVT::i32, 1, { X86::SAR32rCL } }, + { ISD::SRA, MVT::i16, 1, { X86::SAR16rCL } }, + { ISD::SRA, MVT::i8, 1, { X86::SAR8rCL } }, + + { ISD::AND, MVT::i32, 1, { X86::AND32rr } }, + { ISD::AND, MVT::i16, 1, { X86::AND16rr } }, + { ISD::AND, MVT::i8, 1, { X86::AND8rr } }, + { ISD::OR, MVT::i32, 1, { X86::OR32rr } }, + { ISD::OR, MVT::i16, 1, { X86::OR16rr } }, + { ISD::OR, MVT::i8, 1, { X86::OR8rr } }, + { ISD::XOR, MVT::i32, 1, { X86::XOR32rr } }, + { ISD::XOR, MVT::i16, 1, { X86::XOR16rr } }, + { ISD::XOR, MVT::i8, 1, { X86::XOR8rr } }, + + // TODO - move to cost=20 and remove the vector x20 factor? + { ISD::SDIV, MVT::i32, 1, { X86::IDIV32r } }, + { ISD::SREM, MVT::i32, 1, { X86::IDIV32r } }, + { ISD::SDIV, MVT::i16, 1, { X86::IDIV16r } }, + { ISD::SREM, MVT::i16, 1, { X86::IDIV16r } }, + { ISD::SDIV, MVT::i8, 1, { X86::IDIV8r } }, + { ISD::SREM, MVT::i8, 1, { X86::IDIV8r } }, + + { ISD::UDIV, MVT::i32, 1, { X86::DIV32r } }, + { ISD::UREM, MVT::i32, 1, { X86::DIV32r } }, + { ISD::UDIV, MVT::i16, 1, { X86::DIV16r } }, + { ISD::UREM, MVT::i16, 1, { X86::DIV16r } }, + { ISD::UDIV, MVT::i8, 1, { X86::DIV8r } }, + { ISD::UREM, MVT::i8, 1, { X86::DIV8r } }, + }; + + if (const auto *Entry = SchedCostTableLookup(X86CostTable, ISD, LT.second)) + return LT.first * getSchedModelCost(Entry, ST); + // It is not a good idea to vectorize division. We have to scalarize it and // in the process we will often end up having to spilling regular // registers. The overhead of division is going to dominate most kernels @@ -1002,52 +1395,129 @@ if (const auto *Entry = CostTableLookup(XOPShuffleTbl, Kind, LT.second)) return LT.first * Entry->Cost; - static const CostTblEntry AVX1ShuffleTbl[] = { - { TTI::SK_Broadcast, MVT::v4f64, 2 }, // vperm2f128 + vpermilpd - { TTI::SK_Broadcast, MVT::v8f32, 2 }, // vperm2f128 + vpermilps - { TTI::SK_Broadcast, MVT::v4i64, 2 }, // vperm2f128 + vpermilpd - { TTI::SK_Broadcast, MVT::v8i32, 2 }, // vperm2f128 + vpermilps - { TTI::SK_Broadcast, MVT::v16i16, 3 }, // vpshuflw + vpshufd + vinsertf128 - { TTI::SK_Broadcast, MVT::v32i8, 2 }, // vpshufb + vinsertf128 - - { TTI::SK_Reverse, MVT::v4f64, 2 }, // vperm2f128 + vpermilpd - { TTI::SK_Reverse, MVT::v8f32, 2 }, // vperm2f128 + vpermilps - { TTI::SK_Reverse, MVT::v4i64, 2 }, // vperm2f128 + vpermilpd - { TTI::SK_Reverse, MVT::v8i32, 2 }, // vperm2f128 + vpermilps - { TTI::SK_Reverse, MVT::v16i16, 4 }, // vextractf128 + 2*pshufb - // + vinsertf128 - { TTI::SK_Reverse, MVT::v32i8, 4 }, // vextractf128 + 2*pshufb - // + vinsertf128 - - { TTI::SK_Select, MVT::v4i64, 1 }, // vblendpd - { TTI::SK_Select, MVT::v4f64, 1 }, // vblendpd - { TTI::SK_Select, MVT::v8i32, 1 }, // vblendps - { TTI::SK_Select, MVT::v8f32, 1 }, // vblendps - { TTI::SK_Select, MVT::v16i16, 3 }, // vpand + vpandn + vpor - { TTI::SK_Select, MVT::v32i8, 3 }, // vpand + vpandn + vpor - - { TTI::SK_PermuteSingleSrc, MVT::v4f64, 2 }, // vperm2f128 + vshufpd - { TTI::SK_PermuteSingleSrc, MVT::v4i64, 2 }, // vperm2f128 + vshufpd - { TTI::SK_PermuteSingleSrc, MVT::v8f32, 4 }, // 2*vperm2f128 + 2*vshufps - { TTI::SK_PermuteSingleSrc, MVT::v8i32, 4 }, // 2*vperm2f128 + 2*vshufps - { TTI::SK_PermuteSingleSrc, MVT::v16i16, 8 }, // vextractf128 + 4*pshufb - // + 2*por + vinsertf128 - { TTI::SK_PermuteSingleSrc, MVT::v32i8, 8 }, // vextractf128 + 4*pshufb - // + 2*por + vinsertf128 - - { TTI::SK_PermuteTwoSrc, MVT::v4f64, 3 }, // 2*vperm2f128 + vshufpd - { TTI::SK_PermuteTwoSrc, MVT::v4i64, 3 }, // 2*vperm2f128 + vshufpd - { TTI::SK_PermuteTwoSrc, MVT::v8f32, 4 }, // 2*vperm2f128 + 2*vshufps - { TTI::SK_PermuteTwoSrc, MVT::v8i32, 4 }, // 2*vperm2f128 + 2*vshufps - { TTI::SK_PermuteTwoSrc, MVT::v16i16, 15 }, // 2*vextractf128 + 8*pshufb - // + 4*por + vinsertf128 - { TTI::SK_PermuteTwoSrc, MVT::v32i8, 15 }, // 2*vextractf128 + 8*pshufb - // + 4*por + vinsertf128 - }; + static const SchedCostTblEntry AVX1ShuffleTbl[] = { + { TTI::SK_Broadcast, MVT::v4f64, 2, { X86::VPERMILPDri, + X86::VINSERTF128rr } }, + { TTI::SK_Broadcast, MVT::v8f32, 2, { X86::VPERMILPSri, + X86::VINSERTF128rr } }, + { TTI::SK_Broadcast, MVT::v4i64, 2, { X86::VPSHUFDri, + X86::VINSERTF128rr } }, + { TTI::SK_Broadcast, MVT::v8i32, 2, { X86::VPSHUFDri, + X86::VINSERTF128rr } }, + { TTI::SK_Broadcast, MVT::v16i16, 3, { X86::VPSHUFLWri, X86::VPSHUFDri, + X86::VINSERTF128rr } }, + { TTI::SK_Broadcast, MVT::v32i8, 2, { X86::VPSHUFBrr, + X86::VINSERTF128rr } }, + + { TTI::SK_Broadcast, MVT::v2f64, 1, { X86::VPERMILPDri } }, + { TTI::SK_Broadcast, MVT::v4f32, 1, { X86::VPERMILPSri } }, + { TTI::SK_Broadcast, MVT::v2i64, 1, { X86::VPSHUFDri } }, + { TTI::SK_Broadcast, MVT::v4i32, 1, { X86::VPSHUFDri } }, + { TTI::SK_Broadcast, MVT::v8i16, 1, { X86::VPSHUFLWri, X86::VPSHUFDri } }, + { TTI::SK_Broadcast, MVT::v16i8, 1, { X86::VPSHUFBrr } }, + + { TTI::SK_Reverse, MVT::v4f64, 2, { X86::VPERMILPDYri, + X86::VPERM2F128rr } }, + { TTI::SK_Reverse, MVT::v8f32, 2, { X86::VPERMILPSYri, + X86::VPERM2F128rr } }, + { TTI::SK_Reverse, MVT::v4i64, 2, { X86::VPERMILPDYri, + X86::VPERM2F128rr } }, + { TTI::SK_Reverse, MVT::v8i32, 2, { X86::VPERMILPSYri, + X86::VPERM2F128rr } }, + { TTI::SK_Reverse, MVT::v16i16, 4, { X86::VEXTRACTF128rr, + X86::VPSHUFBrr, X86::VPSHUFBrr, + X86::VINSERTF128rr } }, + { TTI::SK_Reverse, MVT::v32i8, 4, { X86::VEXTRACTF128rr, + X86::VPSHUFBrr, X86::VPSHUFBrr, + X86::VINSERTF128rr } }, + + { TTI::SK_Reverse, MVT::v2f64, 1, { X86::VSHUFPDrri } }, + { TTI::SK_Reverse, MVT::v4f32, 1, { X86::VSHUFPSrri } }, + { TTI::SK_Reverse, MVT::v2i64, 1, { X86::VPSHUFDri } }, + { TTI::SK_Reverse, MVT::v4i32, 1, { X86::VPSHUFDri } }, + { TTI::SK_Reverse, MVT::v8i16, 1, { X86::VPSHUFBrr } }, + { TTI::SK_Reverse, MVT::v16i8, 1, { X86::VPSHUFBrr } }, + + { TTI::SK_Select, MVT::v4i64, 1, { X86::VBLENDPDYrri } }, + { TTI::SK_Select, MVT::v4f64, 1, { X86::VBLENDPDYrri } }, + { TTI::SK_Select, MVT::v8i32, 1, { X86::VBLENDPSYrri } }, + { TTI::SK_Select, MVT::v8f32, 1, { X86::VBLENDPSYrri } }, + { TTI::SK_Select, MVT::v16i16, 3, { X86::VANDPSYrr, X86::VANDNPSYrr, + X86::VORPSYrr } }, + { TTI::SK_Select, MVT::v32i8, 3, { X86::VANDPSYrr, X86::VANDNPSYrr, + X86::VORPSYrr } }, + + { TTI::SK_Select, MVT::v2f64, 1, { X86::VBLENDPDrri } }, + { TTI::SK_Select, MVT::v4f32, 1, { X86::VBLENDPSrri } }, + { TTI::SK_Select, MVT::v2i64, 1, { X86::VPBLENDWrri } }, + { TTI::SK_Select, MVT::v4i32, 1, { X86::VPBLENDWrri } }, + { TTI::SK_Select, MVT::v8i16, 1, { X86::VPBLENDWrri } }, + { TTI::SK_Select, MVT::v16i8, 1, { X86::VPBLENDVBrr } }, + + { TTI::SK_PermuteSingleSrc, MVT::v4f64, 2, + { X86::VPERM2F128rr, X86::VPERM2F128rr, X86::VSHUFPDYrri } }, + { TTI::SK_PermuteSingleSrc, MVT::v4i64, 2, + { X86::VPERM2F128rr, X86::VPERM2F128rr, X86::VSHUFPDYrri } }, + { TTI::SK_PermuteSingleSrc, MVT::v8f32, 4, + { X86::VPERM2F128rr, X86::VPERM2F128rr, + X86::VSHUFPSYrri, X86::VSHUFPSYrri } }, + { TTI::SK_PermuteSingleSrc, MVT::v8i32, 4, + { X86::VPERM2F128rr, X86::VPERM2F128rr, + X86::VSHUFPSYrri, X86::VSHUFPSYrri } }, + { TTI::SK_PermuteSingleSrc, MVT::v16i16, 8, + { X86::VEXTRACTF128rr, X86::VINSERTF128rr, + X86::VPSHUFBrr, X86::VPSHUFBrr, X86::VORPSYrr, + X86::VPSHUFBrr, X86::VPSHUFBrr, X86::VORPSYrr } }, + { TTI::SK_PermuteSingleSrc, MVT::v32i8, 8, + { X86::VEXTRACTF128rr, X86::VINSERTF128rr, + X86::VPSHUFBrr, X86::VPSHUFBrr, X86::VPORrr, + X86::VPSHUFBrr, X86::VPSHUFBrr, X86::VPORrr } }, + + { TTI::SK_PermuteSingleSrc, MVT::v2f64, 1, { X86::VPERMILPDri } }, + { TTI::SK_PermuteSingleSrc, MVT::v2i64, 1, { X86::VPSHUFDri } }, + { TTI::SK_PermuteSingleSrc, MVT::v4f32, 1, { X86::VPERMILPSri } }, + { TTI::SK_PermuteSingleSrc, MVT::v4i32, 1, { X86::VPSHUFDri } }, + { TTI::SK_PermuteSingleSrc, MVT::v8i16, 1, { X86::VPSHUFBrr } }, + { TTI::SK_PermuteSingleSrc, MVT::v16i8, 1, { X86::VPSHUFBrr } }, + + { TTI::SK_PermuteTwoSrc, MVT::v4f64, 3, + { X86::VPERM2F128rr, X86::VPERM2F128rr, + X86::VSHUFPDYrri, X86::VSHUFPDYrri } }, + { TTI::SK_PermuteTwoSrc, MVT::v4i64, 3, + { X86::VPERM2F128rr, X86::VPERM2F128rr, + X86::VSHUFPDYrri, X86::VSHUFPDYrri } }, + { TTI::SK_PermuteTwoSrc, MVT::v8f32, 4, + { X86::VPERM2F128rr, X86::VPERM2F128rr, + X86::VSHUFPSYrri, X86::VSHUFPSYrri } }, + { TTI::SK_PermuteTwoSrc, MVT::v8i32, 4, + { X86::VPERM2F128rr, X86::VPERM2F128rr, + X86::VSHUFPSYrri, X86::VSHUFPSYrri } }, + { TTI::SK_PermuteTwoSrc, MVT::v16i16, 15, + { X86::VEXTRACTF128rr, X86::VEXTRACTF128rr, X86::VINSERTF128rr, + X86::VPSHUFBrr, X86::VPSHUFBrr, X86::VPSHUFBrr, X86::VPSHUFBrr, + X86::VPSHUFBrr, X86::VPSHUFBrr, X86::VPSHUFBrr, X86::VPSHUFBrr, + X86::VPORrr, X86::VPORrr, X86::VPORrr, X86::VPORrr } }, + { TTI::SK_PermuteTwoSrc, MVT::v32i8, 15, + { X86::VEXTRACTF128rr, X86::VEXTRACTF128rr, X86::VINSERTF128rr, + X86::VPSHUFBrr, X86::VPSHUFBrr, X86::VPSHUFBrr, X86::VPSHUFBrr, + X86::VPSHUFBrr, X86::VPSHUFBrr, X86::VPSHUFBrr, X86::VPSHUFBrr, + X86::VPORrr, X86::VPORrr, X86::VPORrr, X86::VPORrr } }, + + { TTI::SK_PermuteTwoSrc, MVT::v2f64, 1, { X86::SHUFPDrri } }, + { TTI::SK_PermuteTwoSrc, MVT::v2i64, 1, { X86::SHUFPDrri } }, + { TTI::SK_PermuteTwoSrc, MVT::v4f32, 2, { X86::SHUFPSrri, + X86::SHUFPSrri } }, + { TTI::SK_PermuteTwoSrc, MVT::v4i32, 2, { X86::SHUFPSrri, + X86::SHUFPSrri } }, + { TTI::SK_PermuteTwoSrc, MVT::v8i16, 3, { X86::VPSHUFBrr, X86::VPSHUFBrr, + X86::VPORrr } }, + { TTI::SK_PermuteTwoSrc, MVT::v16i8, 3, { X86::VPSHUFBrr, X86::VPSHUFBrr, + X86::VPORrr } }, + }; if (ST->hasAVX()) - if (const auto *Entry = CostTableLookup(AVX1ShuffleTbl, Kind, LT.second)) - return LT.first * Entry->Cost; + if (const auto *Entry = SchedCostTableLookup(AVX1ShuffleTbl, Kind, LT.second)) + return LT.first * getSchedModelCost(Entry, ST); static const CostTblEntry SSE41ShuffleTbl[] = { { TTI::SK_Select, MVT::v2i64, 1 }, // pblendw @@ -1532,14 +2002,28 @@ { ISD::SETCC, MVT::v2i64, 1 }, }; - static const CostTblEntry AVX1CostTbl[] = { - { ISD::SETCC, MVT::v4f64, 1 }, - { ISD::SETCC, MVT::v8f32, 1 }, - // AVX1 does not support 8-wide integer compare. - { ISD::SETCC, MVT::v4i64, 4 }, - { ISD::SETCC, MVT::v8i32, 4 }, - { ISD::SETCC, MVT::v16i16, 4 }, - { ISD::SETCC, MVT::v32i8, 4 }, + static const SchedCostTblEntry AVX1CostTbl[] = { + { ISD::SETCC, MVT::v4f64, 1, { X86::VCMPPDYrri } }, + { ISD::SETCC, MVT::v8f32, 1, { X86::VCMPPSYrri } }, + { ISD::SETCC, MVT::v4i64, 4, { X86::VEXTRACTF128rr, X86::VEXTRACTF128rr, + X86::VPCMPGTQrr, X86::VPCMPGTQrr, + X86::VINSERTF128rr } }, + { ISD::SETCC, MVT::v8i32, 4, { X86::VEXTRACTF128rr, X86::VEXTRACTF128rr, + X86::VPCMPGTDrr, X86::VPCMPGTDrr, + X86::VINSERTF128rr } }, + { ISD::SETCC, MVT::v16i16, 4, { X86::VEXTRACTF128rr, X86::VEXTRACTF128rr, + X86::VPCMPGTWrr, X86::VPCMPGTWrr, + X86::VINSERTF128rr } }, + { ISD::SETCC, MVT::v32i8, 4, { X86::VEXTRACTF128rr, X86::VEXTRACTF128rr, + X86::VPCMPGTBrr, X86::VPCMPGTBrr, + X86::VINSERTF128rr } }, + + { ISD::SETCC, MVT::v2f64, 1, { X86::VCMPPDrri } }, + { ISD::SETCC, MVT::v4f32, 1, { X86::VCMPPSrri } }, + { ISD::SETCC, MVT::v2i64, 1, { X86::VPCMPGTQrr } }, + { ISD::SETCC, MVT::v4i32, 1, { X86::VPCMPGTDrr } }, + { ISD::SETCC, MVT::v8i16, 1, { X86::VPCMPGTWrr } }, + { ISD::SETCC, MVT::v16i8, 1, { X86::VPCMPGTBrr } }, }; static const CostTblEntry AVX2CostTbl[] = { @@ -1574,8 +2058,8 @@ return LT.first * Entry->Cost; if (ST->hasAVX()) - if (const auto *Entry = CostTableLookup(AVX1CostTbl, ISD, MTy)) - return LT.first * Entry->Cost; + if (const auto *Entry = SchedCostTableLookup(AVX1CostTbl, ISD, MTy)) + return LT.first * getSchedModelCost(Entry, ST); if (ST->hasSSE42()) if (const auto *Entry = CostTableLookup(SSE42CostTbl, ISD, MTy)) @@ -1702,12 +2186,15 @@ { ISD::CTTZ, MVT::v8i32, 30 }, // 2 x 128-bit Op + extract/insert { ISD::CTTZ, MVT::v16i16, 26 }, // 2 x 128-bit Op + extract/insert { ISD::CTTZ, MVT::v32i8, 20 }, // 2 x 128-bit Op + extract/insert - { ISD::FSQRT, MVT::f32, 14 }, // SNB from http://www.agner.org/ - { ISD::FSQRT, MVT::v4f32, 14 }, // SNB from http://www.agner.org/ - { ISD::FSQRT, MVT::v8f32, 28 }, // SNB from http://www.agner.org/ - { ISD::FSQRT, MVT::f64, 21 }, // SNB from http://www.agner.org/ - { ISD::FSQRT, MVT::v2f64, 21 }, // SNB from http://www.agner.org/ - { ISD::FSQRT, MVT::v4f64, 43 }, // SNB from http://www.agner.org/ + }; + // SNB from http://www.agner.org/ + static const SchedCostTblEntry AVX1SchedCostTbl[] = { + { ISD::FSQRT, MVT::f32, 14, { X86::VSQRTSSr } }, + { ISD::FSQRT, MVT::v4f32, 14, { X86::VSQRTPSr } }, + { ISD::FSQRT, MVT::v8f32, 28, { X86::VSQRTPSYr } }, + { ISD::FSQRT, MVT::f64, 21, { X86::VSQRTSDr } }, + { ISD::FSQRT, MVT::v2f64, 21, { X86::VSQRTPDr } }, + { ISD::FSQRT, MVT::v4f64, 43, { X86::VSQRTPDYr } }, }; static const CostTblEntry GLMCostTbl[] = { { ISD::FSQRT, MVT::f32, 19 }, // sqrtss @@ -1839,9 +2326,12 @@ if (const auto *Entry = CostTableLookup(AVX2CostTbl, ISD, MTy)) return LT.first * Entry->Cost; - if (ST->hasAVX()) + if (ST->hasAVX()) { if (const auto *Entry = CostTableLookup(AVX1CostTbl, ISD, MTy)) return LT.first * Entry->Cost; + if (const auto *Entry = SchedCostTableLookup(AVX1SchedCostTbl, ISD, MTy)) + return LT.first * getSchedModelCost(Entry, ST); + } if (ST->hasSSE42()) if (const auto *Entry = CostTableLookup(SSE42CostTbl, ISD, MTy)) Index: test/Analysis/CostModel/X86/alternate-shuffle-cost.ll =================================================================== --- test/Analysis/CostModel/X86/alternate-shuffle-cost.ll +++ test/Analysis/CostModel/X86/alternate-shuffle-cost.ll @@ -648,7 +648,7 @@ ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %1 ; ; BTVER2-LABEL: 'test_v16i8' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %1 ; %1 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> @@ -673,7 +673,7 @@ ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %1 ; ; BTVER2-LABEL: 'test_v16i8_2' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %1 ; %1 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> @@ -698,7 +698,7 @@ ; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %1 ; ; BTVER2-LABEL: 'test_v16i8_3' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %1 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %1 ; %1 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> Index: test/Analysis/CostModel/X86/arith-fp.ll =================================================================== --- test/Analysis/CostModel/X86/arith-fp.ll +++ test/Analysis/CostModel/X86/arith-fp.ll @@ -300,10 +300,10 @@ ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fmul <4 x float> undef, undef ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fmul <8 x float> undef, undef ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fmul <16 x float> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F64 = fmul double undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fmul <2 x double> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fmul <4 x double> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fmul <8 x double> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F64 = fmul double undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = fmul <2 x double> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = fmul <4 x double> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = fmul <8 x double> undef, undef ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %F32 = fmul float undef, undef @@ -398,14 +398,14 @@ ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'fdiv' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %F32 = fdiv float undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F32 = fdiv <4 x float> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8F32 = fdiv <8 x float> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16F32 = fdiv <16 x float> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %F64 = fdiv double undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V2F64 = fdiv <2 x double> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4F64 = fdiv <4 x double> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8F64 = fdiv <8 x double> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %F32 = fdiv float undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4F32 = fdiv <4 x float> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V8F32 = fdiv <8 x float> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V16F32 = fdiv <16 x float> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %F64 = fdiv double undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2F64 = fdiv <2 x double> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V4F64 = fdiv <4 x double> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V8F64 = fdiv <8 x double> undef, undef ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %F32 = fdiv float undef, undef @@ -580,14 +580,14 @@ ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'fsqrt' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %F32 = call float @llvm.sqrt.f32(float undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %F64 = call double @llvm.sqrt.f64(double undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef) -; BTVER2-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %F32 = call float @llvm.sqrt.f32(float undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V4F32 = call <4 x float> @llvm.sqrt.v4f32(<4 x float> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V8F32 = call <8 x float> @llvm.sqrt.v8f32(<8 x float> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V16F32 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %F64 = call double @llvm.sqrt.f64(double undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V2F64 = call <2 x double> @llvm.sqrt.v2f64(<2 x double> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V4F64 = call <4 x double> @llvm.sqrt.v4f64(<4 x double> undef) +; BTVER2-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V8F64 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> undef) ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %F32 = call float @llvm.sqrt.f32(float undef) Index: test/Analysis/CostModel/X86/arith.ll =================================================================== --- test/Analysis/CostModel/X86/arith.ll +++ test/Analysis/CostModel/X86/arith.ll @@ -170,20 +170,20 @@ ; BTVER2-LABEL: 'add' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = add i64 undef, undef ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = add <2 x i64> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = add <4 x i64> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = add <8 x i64> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I64 = add <4 x i64> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = add <8 x i64> undef, undef ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = add i32 undef, undef ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = add <4 x i32> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = add <8 x i32> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = add <16 x i32> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I32 = add <8 x i32> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I32 = add <16 x i32> undef, undef ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = add i16 undef, undef ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = add <8 x i16> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = add <16 x i16> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = add <32 x i16> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = add <16 x i16> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = add <32 x i16> undef, undef ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = add i8 undef, undef ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = add <16 x i8> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = add <32 x i8> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = add <64 x i8> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = add <32 x i8> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = add <64 x i8> undef, undef ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = add i64 undef, undef @@ -365,20 +365,20 @@ ; BTVER2-LABEL: 'sub' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sub i64 undef, undef ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = sub <2 x i64> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = sub <4 x i64> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = sub <8 x i64> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I64 = sub <4 x i64> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = sub <8 x i64> undef, undef ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sub i32 undef, undef ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = sub <4 x i32> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = sub <8 x i32> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = sub <16 x i32> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I32 = sub <8 x i32> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I32 = sub <16 x i32> undef, undef ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sub i16 undef, undef ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = sub <8 x i16> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = sub <16 x i16> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = sub <32 x i16> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = sub <16 x i16> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = sub <32 x i16> undef, undef ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sub i8 undef, undef ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = sub <16 x i8> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = sub <32 x i8> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = sub <64 x i8> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = sub <32 x i8> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = sub <64 x i8> undef, undef ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = sub i64 undef, undef @@ -1105,22 +1105,22 @@ ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'mul' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = mul i64 undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V2I64 = mul <2 x i64> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4I64 = mul <4 x i64> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8I64 = mul <8 x i64> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = mul i64 undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = mul <2 x i64> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = mul <4 x i64> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = mul <8 x i64> undef, undef ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = mul i32 undef, undef ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = mul <4 x i32> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = mul <8 x i32> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = mul <16 x i32> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V8I32 = mul <8 x i32> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16I32 = mul <16 x i32> undef, undef ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = mul i16 undef, undef ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = mul <8 x i16> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = mul <16 x i16> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = mul <32 x i16> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = mul <16 x i16> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = mul <32 x i16> undef, undef ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = mul i8 undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I8 = mul <16 x i8> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32I8 = mul <32 x i8> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V64I8 = mul <64 x i8> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I8 = mul <16 x i8> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V32I8 = mul <32 x i8> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V64I8 = mul <64 x i8> undef, undef ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = mul i64 undef, undef @@ -1179,7 +1179,7 @@ ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; BTVER2-LABEL: 'mul_2i32' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %A0 = mul <2 x i32> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %A0 = mul <2 x i32> undef, undef ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %A0 = mul <2 x i32> undef, undef Index: test/Analysis/CostModel/X86/cmp.ll =================================================================== --- test/Analysis/CostModel/X86/cmp.ll +++ test/Analysis/CostModel/X86/cmp.ll @@ -94,12 +94,12 @@ ; BTVER2-LABEL: 'cmp_float' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = fcmp olt <2 x float> undef, undef ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = fcmp olt <4 x float> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = fcmp olt <8 x float> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16F32 = fcmp olt <16 x float> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = fcmp olt <8 x float> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = fcmp olt <16 x float> undef, undef ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = fcmp olt <2 x double> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = fcmp olt <4 x double> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F64 = fcmp olt <8 x double> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F64 = fcmp olt <16 x double> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = fcmp olt <4 x double> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = fcmp olt <8 x double> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16F64 = fcmp olt <16 x double> undef, undef ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V2F32 = fcmp olt <2 x float> undef, undef @@ -289,21 +289,21 @@ ; ; BTVER2-LABEL: 'cmp_int' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = icmp eq <16 x i8> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V32I8 = icmp eq <32 x i8> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V64I8 = icmp eq <64 x i8> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V128I8 = icmp eq <128 x i8> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V32I8 = icmp eq <32 x i8> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V64I8 = icmp eq <64 x i8> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V128I8 = icmp eq <128 x i8> undef, undef ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = icmp eq <8 x i16> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I16 = icmp eq <16 x i16> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V32I16 = icmp eq <32 x i16> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V64I16 = icmp eq <64 x i16> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V16I16 = icmp eq <16 x i16> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V32I16 = icmp eq <32 x i16> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V64I16 = icmp eq <64 x i16> undef, undef ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = icmp eq <4 x i32> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I32 = icmp eq <8 x i32> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16I32 = icmp eq <16 x i32> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V32I32 = icmp eq <32 x i32> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8I32 = icmp eq <8 x i32> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16I32 = icmp eq <16 x i32> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V32I32 = icmp eq <32 x i32> undef, undef ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = icmp eq <2 x i64> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4I64 = icmp eq <4 x i64> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8I64 = icmp eq <8 x i64> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16I64 = icmp eq <16 x i64> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V4I64 = icmp eq <4 x i64> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8I64 = icmp eq <8 x i64> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16I64 = icmp eq <16 x i64> undef, undef ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %V16I8 = icmp eq <16 x i8> undef, undef Index: test/Analysis/CostModel/X86/div.ll =================================================================== --- test/Analysis/CostModel/X86/div.ll +++ test/Analysis/CostModel/X86/div.ll @@ -32,22 +32,22 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'sdiv' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = sdiv <2 x i64> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = sdiv <4 x i64> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = sdiv <8 x i64> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i32 = sdiv <4 x i32> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i32 = sdiv <8 x i32> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V16i32 = sdiv <16 x i32> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i16 = sdiv <8 x i16> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V16i16 = sdiv <16 x i16> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V32i16 = sdiv <32 x i16> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V16i8 = sdiv <16 x i8> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V32i8 = sdiv <32 x i8> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %V64i8 = sdiv <64 x i8> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %I64 = sdiv i64 undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1640 for instruction: %V2i64 = sdiv <2 x i64> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3280 for instruction: %V4i64 = sdiv <4 x i64> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6560 for instruction: %V8i64 = sdiv <8 x i64> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %I32 = sdiv i32 undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2000 for instruction: %V4i32 = sdiv <4 x i32> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4000 for instruction: %V8i32 = sdiv <8 x i32> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 8000 for instruction: %V16i32 = sdiv <16 x i32> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %I16 = sdiv i16 undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2720 for instruction: %V8i16 = sdiv <8 x i16> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 5440 for instruction: %V16i16 = sdiv <16 x i16> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10880 for instruction: %V32i16 = sdiv <32 x i16> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %I8 = sdiv i8 undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3840 for instruction: %V16i8 = sdiv <16 x i8> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 7680 for instruction: %V32i8 = sdiv <32 x i8> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 15360 for instruction: %V64i8 = sdiv <64 x i8> undef, undef ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = sdiv i64 undef, undef @@ -94,22 +94,22 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'udiv' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = udiv <2 x i64> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = udiv <4 x i64> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = udiv <8 x i64> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i32 = udiv <4 x i32> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i32 = udiv <8 x i32> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V16i32 = udiv <16 x i32> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i16 = udiv <8 x i16> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V16i16 = udiv <16 x i16> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V32i16 = udiv <32 x i16> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V16i8 = udiv <16 x i8> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V32i8 = udiv <32 x i8> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %V64i8 = udiv <64 x i8> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %I64 = udiv i64 undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1640 for instruction: %V2i64 = udiv <2 x i64> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3280 for instruction: %V4i64 = udiv <4 x i64> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6560 for instruction: %V8i64 = udiv <8 x i64> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %I32 = udiv i32 undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2000 for instruction: %V4i32 = udiv <4 x i32> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4000 for instruction: %V8i32 = udiv <8 x i32> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 8000 for instruction: %V16i32 = udiv <16 x i32> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %I16 = udiv i16 undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2720 for instruction: %V8i16 = udiv <8 x i16> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 5440 for instruction: %V16i16 = udiv <16 x i16> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10880 for instruction: %V32i16 = udiv <32 x i16> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %I8 = udiv i8 undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3840 for instruction: %V16i8 = udiv <16 x i8> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 7680 for instruction: %V32i8 = udiv <32 x i8> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 15360 for instruction: %V64i8 = udiv <64 x i8> undef, undef ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = udiv i64 undef, undef @@ -156,22 +156,22 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'sdiv_const' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, 7 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = sdiv <2 x i64> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = sdiv <4 x i64> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = sdiv <8 x i64> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, 7 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i32 = sdiv <4 x i32> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i32 = sdiv <8 x i32> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V16i32 = sdiv <16 x i32> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, 7 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i16 = sdiv <8 x i16> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V16i16 = sdiv <16 x i16> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V32i16 = sdiv <32 x i16> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, 7 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V16i8 = sdiv <16 x i8> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V32i8 = sdiv <32 x i8> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %V64i8 = sdiv <64 x i8> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %I64 = sdiv i64 undef, 7 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1640 for instruction: %V2i64 = sdiv <2 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3280 for instruction: %V4i64 = sdiv <4 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6560 for instruction: %V8i64 = sdiv <8 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %I32 = sdiv i32 undef, 7 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2000 for instruction: %V4i32 = sdiv <4 x i32> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4000 for instruction: %V8i32 = sdiv <8 x i32> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 8000 for instruction: %V16i32 = sdiv <16 x i32> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %I16 = sdiv i16 undef, 7 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2720 for instruction: %V8i16 = sdiv <8 x i16> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 5440 for instruction: %V16i16 = sdiv <16 x i16> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10880 for instruction: %V32i16 = sdiv <32 x i16> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %I8 = sdiv i8 undef, 7 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3840 for instruction: %V16i8 = sdiv <16 x i8> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 7680 for instruction: %V32i8 = sdiv <32 x i8> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 15360 for instruction: %V64i8 = sdiv <64 x i8> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = sdiv i64 undef, 7 @@ -218,22 +218,22 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'udiv_const' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 7 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = udiv <2 x i64> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = udiv <4 x i64> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = udiv <8 x i64> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 7 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i32 = udiv <4 x i32> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i32 = udiv <8 x i32> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V16i32 = udiv <16 x i32> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 7 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i16 = udiv <8 x i16> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V16i16 = udiv <16 x i16> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V32i16 = udiv <32 x i16> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 7 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V16i8 = udiv <16 x i8> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V32i8 = udiv <32 x i8> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %V64i8 = udiv <64 x i8> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %I64 = udiv i64 undef, 7 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1640 for instruction: %V2i64 = udiv <2 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3280 for instruction: %V4i64 = udiv <4 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6560 for instruction: %V8i64 = udiv <8 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %I32 = udiv i32 undef, 7 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2000 for instruction: %V4i32 = udiv <4 x i32> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4000 for instruction: %V8i32 = udiv <8 x i32> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 8000 for instruction: %V16i32 = udiv <16 x i32> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %I16 = udiv i16 undef, 7 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2720 for instruction: %V8i16 = udiv <8 x i16> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 5440 for instruction: %V16i16 = udiv <16 x i16> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10880 for instruction: %V32i16 = udiv <32 x i16> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %I8 = udiv i8 undef, 7 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3840 for instruction: %V16i8 = udiv <16 x i8> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 7680 for instruction: %V32i8 = udiv <32 x i8> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 15360 for instruction: %V64i8 = udiv <64 x i8> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = udiv i64 undef, 7 @@ -413,22 +413,22 @@ ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'sdiv_uniformconst' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sdiv i64 undef, 7 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = sdiv <2 x i64> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = sdiv <4 x i64> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = sdiv <8 x i64> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sdiv i32 undef, 7 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %I64 = sdiv i64 undef, 7 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1640 for instruction: %V2i64 = sdiv <2 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3280 for instruction: %V4i64 = sdiv <4 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6560 for instruction: %V8i64 = sdiv <8 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %I32 = sdiv i32 undef, 7 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4i32 = sdiv <4 x i32> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i32 = sdiv <8 x i32> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16i32 = sdiv <16 x i32> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = sdiv i16 undef, 7 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %I16 = sdiv i16 undef, 7 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = sdiv <8 x i16> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i16 = sdiv <16 x i16> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32i16 = sdiv <32 x i16> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = sdiv i8 undef, 7 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V16i8 = sdiv <16 x i8> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V32i8 = sdiv <32 x i8> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %V64i8 = sdiv <64 x i8> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %I8 = sdiv i8 undef, 7 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3840 for instruction: %V16i8 = sdiv <16 x i8> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 7680 for instruction: %V32i8 = sdiv <32 x i8> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 15360 for instruction: %V64i8 = sdiv <64 x i8> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = sdiv i64 undef, 7 @@ -570,22 +570,22 @@ ; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'udiv_uniformconst' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 7 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = udiv <2 x i64> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = udiv <4 x i64> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = udiv <8 x i64> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 7 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %I64 = udiv i64 undef, 7 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1640 for instruction: %V2i64 = udiv <2 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3280 for instruction: %V4i64 = udiv <4 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6560 for instruction: %V8i64 = udiv <8 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %I32 = udiv i32 undef, 7 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4i32 = udiv <4 x i32> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V8i32 = udiv <8 x i32> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V16i32 = udiv <16 x i32> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 7 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %I16 = udiv i16 undef, 7 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = udiv <8 x i16> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16i16 = udiv <16 x i16> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V32i16 = udiv <32 x i16> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 7 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V16i8 = udiv <16 x i8> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V32i8 = udiv <32 x i8> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %V64i8 = udiv <64 x i8> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %I8 = udiv i8 undef, 7 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3840 for instruction: %V16i8 = udiv <16 x i8> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 7680 for instruction: %V32i8 = udiv <32 x i8> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 15360 for instruction: %V64i8 = udiv <64 x i8> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = udiv i64 undef, 7 @@ -766,21 +766,21 @@ ; ; BTVER2-LABEL: 'sdiv_constpow2' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V2i64 = sdiv <2 x i64> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V4i64 = sdiv <4 x i64> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 132 for instruction: %V8i64 = sdiv <8 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V2i64 = sdiv <2 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V4i64 = sdiv <4 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 62 for instruction: %V8i64 = sdiv <8 x i64> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V4i32 = sdiv <4 x i32> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V8i32 = sdiv <8 x i32> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V16i32 = sdiv <16 x i32> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V4i32 = sdiv <4 x i32> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V8i32 = sdiv <8 x i32> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V16i32 = sdiv <16 x i32> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8i16 = sdiv <8 x i16> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V16i16 = sdiv <16 x i16> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 188 for instruction: %V32i16 = sdiv <32 x i16> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8i16 = sdiv <8 x i16> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V16i16 = sdiv <16 x i16> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 162 for instruction: %V32i16 = sdiv <32 x i16> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V16i8 = sdiv <16 x i8> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V32i8 = sdiv <32 x i8> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 260 for instruction: %V64i8 = sdiv <64 x i8> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V16i8 = sdiv <16 x i8> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V32i8 = sdiv <32 x i8> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %V64i8 = sdiv <64 x i8> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = sdiv i64 undef, 16 @@ -961,21 +961,21 @@ ; ; BTVER2-LABEL: 'udiv_constpow2' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = udiv i64 undef, 16 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2i64 = udiv <2 x i64> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4i64 = udiv <4 x i64> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8i64 = udiv <8 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = udiv <2 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = udiv <4 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8i64 = udiv <8 x i64> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = udiv i32 undef, 16 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4i32 = udiv <4 x i32> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i32 = udiv <8 x i32> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i32 = udiv <16 x i32> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V4i32 = udiv <4 x i32> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8i32 = udiv <8 x i32> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V16i32 = udiv <16 x i32> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = udiv i16 undef, 16 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8i16 = udiv <8 x i16> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V16i16 = udiv <16 x i16> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V32i16 = udiv <32 x i16> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V8i16 = udiv <8 x i16> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V16i16 = udiv <16 x i16> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V32i16 = udiv <32 x i16> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = udiv i8 undef, 16 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i8 = udiv <16 x i8> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V32i8 = udiv <32 x i8> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V64i8 = udiv <64 x i8> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V16i8 = udiv <16 x i8> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V32i8 = udiv <32 x i8> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V64i8 = udiv <64 x i8> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = udiv i64 undef, 16 @@ -1119,20 +1119,20 @@ ; BTVER2-LABEL: 'sdiv_uniformconstpow2' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I64 = sdiv i64 undef, 16 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2i64 = sdiv <2 x i64> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V4i64 = sdiv <4 x i64> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V8i64 = sdiv <8 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %V4i64 = sdiv <4 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8i64 = sdiv <8 x i64> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I32 = sdiv i32 undef, 16 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i32 = sdiv <4 x i32> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8i32 = sdiv <8 x i32> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16i32 = sdiv <16 x i32> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8i32 = sdiv <8 x i32> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V16i32 = sdiv <16 x i32> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I16 = sdiv i16 undef, 16 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i16 = sdiv <8 x i16> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i16 = sdiv <16 x i16> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V32i16 = sdiv <32 x i16> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V16i16 = sdiv <16 x i16> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V32i16 = sdiv <32 x i16> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %I8 = sdiv i8 undef, 16 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V16i8 = sdiv <16 x i8> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V32i8 = sdiv <32 x i8> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V64i8 = sdiv <64 x i8> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: %V32i8 = sdiv <32 x i8> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V64i8 = sdiv <64 x i8> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = sdiv i64 undef, 16 Index: test/Analysis/CostModel/X86/rem.ll =================================================================== --- test/Analysis/CostModel/X86/rem.ll +++ test/Analysis/CostModel/X86/rem.ll @@ -32,22 +32,22 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'srem' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = srem i64 undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = srem <2 x i64> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = srem <4 x i64> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = srem <8 x i64> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = srem i32 undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i32 = srem <4 x i32> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i32 = srem <8 x i32> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V16i32 = srem <16 x i32> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = srem i16 undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i16 = srem <8 x i16> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V16i16 = srem <16 x i16> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V32i16 = srem <32 x i16> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = srem i8 undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V16i8 = srem <16 x i8> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V32i8 = srem <32 x i8> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %V64i8 = srem <64 x i8> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %I64 = srem i64 undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1640 for instruction: %V2i64 = srem <2 x i64> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3280 for instruction: %V4i64 = srem <4 x i64> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6560 for instruction: %V8i64 = srem <8 x i64> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %I32 = srem i32 undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2000 for instruction: %V4i32 = srem <4 x i32> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4000 for instruction: %V8i32 = srem <8 x i32> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 8000 for instruction: %V16i32 = srem <16 x i32> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %I16 = srem i16 undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2720 for instruction: %V8i16 = srem <8 x i16> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 5440 for instruction: %V16i16 = srem <16 x i16> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10880 for instruction: %V32i16 = srem <32 x i16> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %I8 = srem i8 undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3840 for instruction: %V16i8 = srem <16 x i8> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 7680 for instruction: %V32i8 = srem <32 x i8> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 15360 for instruction: %V64i8 = srem <64 x i8> undef, undef ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = srem i64 undef, undef @@ -94,22 +94,22 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'urem' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = urem i64 undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = urem <2 x i64> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = urem <4 x i64> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = urem <8 x i64> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = urem i32 undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i32 = urem <4 x i32> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i32 = urem <8 x i32> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V16i32 = urem <16 x i32> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i16 = urem <8 x i16> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V16i16 = urem <16 x i16> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V32i16 = urem <32 x i16> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V16i8 = urem <16 x i8> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V32i8 = urem <32 x i8> undef, undef -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %V64i8 = urem <64 x i8> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %I64 = urem i64 undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1640 for instruction: %V2i64 = urem <2 x i64> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3280 for instruction: %V4i64 = urem <4 x i64> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6560 for instruction: %V8i64 = urem <8 x i64> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %I32 = urem i32 undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2000 for instruction: %V4i32 = urem <4 x i32> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4000 for instruction: %V8i32 = urem <8 x i32> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 8000 for instruction: %V16i32 = urem <16 x i32> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %I16 = urem i16 undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2720 for instruction: %V8i16 = urem <8 x i16> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 5440 for instruction: %V16i16 = urem <16 x i16> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10880 for instruction: %V32i16 = urem <32 x i16> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %I8 = urem i8 undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3840 for instruction: %V16i8 = urem <16 x i8> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 7680 for instruction: %V32i8 = urem <32 x i8> undef, undef +; BTVER2-NEXT: Cost Model: Found an estimated cost of 15360 for instruction: %V64i8 = urem <64 x i8> undef, undef ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = urem i64 undef, undef @@ -156,22 +156,22 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'srem_const' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = srem i64 undef, 7 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = srem <2 x i64> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = srem <4 x i64> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = srem <8 x i64> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = srem i32 undef, 7 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i32 = srem <4 x i32> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i32 = srem <8 x i32> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V16i32 = srem <16 x i32> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = srem i16 undef, 7 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i16 = srem <8 x i16> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V16i16 = srem <16 x i16> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V32i16 = srem <32 x i16> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = srem i8 undef, 7 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V16i8 = srem <16 x i8> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V32i8 = srem <32 x i8> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %V64i8 = srem <64 x i8> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %I64 = srem i64 undef, 7 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1640 for instruction: %V2i64 = srem <2 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3280 for instruction: %V4i64 = srem <4 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6560 for instruction: %V8i64 = srem <8 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %I32 = srem i32 undef, 7 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2000 for instruction: %V4i32 = srem <4 x i32> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4000 for instruction: %V8i32 = srem <8 x i32> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 8000 for instruction: %V16i32 = srem <16 x i32> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %I16 = srem i16 undef, 7 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2720 for instruction: %V8i16 = srem <8 x i16> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 5440 for instruction: %V16i16 = srem <16 x i16> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10880 for instruction: %V32i16 = srem <32 x i16> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %I8 = srem i8 undef, 7 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3840 for instruction: %V16i8 = srem <16 x i8> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 7680 for instruction: %V32i8 = srem <32 x i8> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 15360 for instruction: %V64i8 = srem <64 x i8> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = srem i64 undef, 7 @@ -218,22 +218,22 @@ ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'urem_const' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = urem i64 undef, 7 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = urem <2 x i64> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = urem <4 x i64> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = urem <8 x i64> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = urem i32 undef, 7 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i32 = urem <4 x i32> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i32 = urem <8 x i32> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V16i32 = urem <16 x i32> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 7 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i16 = urem <8 x i16> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V16i16 = urem <16 x i16> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V32i16 = urem <32 x i16> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, 7 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V16i8 = urem <16 x i8> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V32i8 = urem <32 x i8> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %V64i8 = urem <64 x i8> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %I64 = urem i64 undef, 7 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1640 for instruction: %V2i64 = urem <2 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3280 for instruction: %V4i64 = urem <4 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6560 for instruction: %V8i64 = urem <8 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %I32 = urem i32 undef, 7 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2000 for instruction: %V4i32 = urem <4 x i32> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4000 for instruction: %V8i32 = urem <8 x i32> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 8000 for instruction: %V16i32 = urem <16 x i32> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %I16 = urem i16 undef, 7 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2720 for instruction: %V8i16 = urem <8 x i16> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 5440 for instruction: %V16i16 = urem <16 x i16> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10880 for instruction: %V32i16 = urem <32 x i16> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %I8 = urem i8 undef, 7 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3840 for instruction: %V16i8 = urem <16 x i8> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 7680 for instruction: %V32i8 = urem <32 x i8> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 15360 for instruction: %V64i8 = urem <64 x i8> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = urem i64 undef, 7 @@ -432,22 +432,22 @@ ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'srem_uniformconst' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = srem i64 undef, 7 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = srem <2 x i64> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = srem <4 x i64> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = srem <8 x i64> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = srem i32 undef, 7 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %I64 = srem i64 undef, 7 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1640 for instruction: %V2i64 = srem <2 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3280 for instruction: %V4i64 = srem <4 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6560 for instruction: %V8i64 = srem <8 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %I32 = srem i32 undef, 7 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i32 = srem <4 x i32> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V8i32 = srem <8 x i32> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V16i32 = srem <16 x i32> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = srem i16 undef, 7 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %I16 = srem i16 undef, 7 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = srem <8 x i16> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16i16 = srem <16 x i16> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32i16 = srem <32 x i16> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = srem i8 undef, 7 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V16i8 = srem <16 x i8> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V32i8 = srem <32 x i8> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %V64i8 = srem <64 x i8> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %I8 = srem i8 undef, 7 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3840 for instruction: %V16i8 = srem <16 x i8> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 7680 for instruction: %V32i8 = srem <32 x i8> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 15360 for instruction: %V64i8 = srem <64 x i8> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = srem i64 undef, 7 @@ -570,22 +570,22 @@ ; AVX512BW-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'urem_uniformconst' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = urem i64 undef, 7 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V2i64 = urem <2 x i64> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V4i64 = urem <4 x i64> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V8i64 = urem <8 x i64> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = urem i32 undef, 7 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %I64 = urem i64 undef, 7 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1640 for instruction: %V2i64 = urem <2 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3280 for instruction: %V4i64 = urem <4 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6560 for instruction: %V8i64 = urem <8 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %I32 = urem i32 undef, 7 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4i32 = urem <4 x i32> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V8i32 = urem <8 x i32> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V16i32 = urem <16 x i32> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I16 = urem i16 undef, 7 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %I16 = urem i16 undef, 7 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8i16 = urem <8 x i16> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V16i16 = urem <16 x i16> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V32i16 = urem <32 x i16> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I8 = urem i8 undef, 7 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V16i8 = urem <16 x i8> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 640 for instruction: %V32i8 = urem <32 x i8> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1280 for instruction: %V64i8 = urem <64 x i8> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %I8 = urem i8 undef, 7 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3840 for instruction: %V16i8 = urem <16 x i8> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 7680 for instruction: %V32i8 = urem <32 x i8> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 15360 for instruction: %V64i8 = urem <64 x i8> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = urem i64 undef, 7 @@ -784,22 +784,22 @@ ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'srem_constpow2' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I64 = srem i64 undef, 16 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V2i64 = srem <2 x i64> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V4i64 = srem <4 x i64> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V8i64 = srem <8 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = srem i64 undef, 16 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V2i64 = srem <2 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V4i64 = srem <4 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8i64 = srem <8 x i64> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V4i32 = srem <4 x i32> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %V8i32 = srem <8 x i32> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 176 for instruction: %V16i32 = srem <16 x i32> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4i32 = srem <4 x i32> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V8i32 = srem <8 x i32> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V16i32 = srem <16 x i32> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V8i16 = srem <8 x i16> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 102 for instruction: %V16i16 = srem <16 x i16> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 204 for instruction: %V32i16 = srem <32 x i16> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V8i16 = srem <8 x i16> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 87 for instruction: %V16i16 = srem <16 x i16> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 174 for instruction: %V32i16 = srem <32 x i16> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 74 for instruction: %V16i8 = srem <16 x i8> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32i8 = srem <32 x i8> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V64i8 = srem <64 x i8> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 58 for instruction: %V16i8 = srem <16 x i8> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 122 for instruction: %V32i8 = srem <32 x i8> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 244 for instruction: %V64i8 = srem <64 x i8> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = srem i64 undef, 16 @@ -1117,22 +1117,22 @@ ; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; BTVER2-LABEL: 'srem_uniformconstpow2' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I64 = srem i64 undef, 16 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V2i64 = srem <2 x i64> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V4i64 = srem <4 x i64> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %V8i64 = srem <8 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %I64 = srem i64 undef, 16 +; BTVER2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V2i64 = srem <2 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V4i64 = srem <4 x i64> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V8i64 = srem <8 x i64> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I32 = srem i32 undef, 16 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4i32 = srem <4 x i32> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8i32 = srem <8 x i32> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16i32 = srem <16 x i32> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8i32 = srem <8 x i32> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V16i32 = srem <16 x i32> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I16 = srem i16 undef, 16 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i16 = srem <8 x i16> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16i16 = srem <16 x i16> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V32i16 = srem <32 x i16> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V16i16 = srem <16 x i16> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V32i16 = srem <32 x i16> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %I8 = srem i8 undef, 16 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16i8 = srem <16 x i8> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V32i8 = srem <32 x i8> undef, -; BTVER2-NEXT: Cost Model: Found an estimated cost of 120 for instruction: %V64i8 = srem <64 x i8> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16i8 = srem <16 x i8> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V32i8 = srem <32 x i8> undef, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V64i8 = srem <64 x i8> undef, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; %I64 = srem i64 undef, 16 Index: test/Analysis/CostModel/X86/shuffle-broadcast.ll =================================================================== --- test/Analysis/CostModel/X86/shuffle-broadcast.ll +++ test/Analysis/CostModel/X86/shuffle-broadcast.ll @@ -43,8 +43,8 @@ ; ; BTVER2-LABEL: 'test_vXf64' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> zeroinitializer +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> zeroinitializer +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> zeroinitializer ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> zeroinitializer @@ -80,8 +80,8 @@ ; ; BTVER2-LABEL: 'test_vXi64' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> zeroinitializer +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> zeroinitializer +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> zeroinitializer ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> zeroinitializer @@ -122,8 +122,8 @@ ; BTVER2-LABEL: 'test_vXf32' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> zeroinitializer ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> undef, <4 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> zeroinitializer +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> undef, <8 x i32> zeroinitializer +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> undef, <16 x i32> zeroinitializer ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %V64 = shufflevector <2 x float> %src64, <2 x float> undef, <2 x i32> zeroinitializer @@ -165,8 +165,8 @@ ; BTVER2-LABEL: 'test_vXi32' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> zeroinitializer ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> undef, <4 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> zeroinitializer +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> undef, <8 x i32> zeroinitializer +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> undef, <16 x i32> zeroinitializer ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %V64 = shufflevector <2 x i32> %src64, <2 x i32> undef, <2 x i32> zeroinitializer @@ -215,8 +215,8 @@ ; ; BTVER2-LABEL: 'test_vXi16' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> zeroinitializer +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> zeroinitializer +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> zeroinitializer ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> zeroinitializer @@ -263,7 +263,7 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; BTVER2-LABEL: 'test_vXi8' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> zeroinitializer +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> zeroinitializer ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> zeroinitializer ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> zeroinitializer ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void @@ -310,8 +310,8 @@ ; BTVER2-LABEL: 'test_upper_vXf32' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %a128, <4 x float> %b128, <4 x i32> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V256 = shufflevector <8 x float> %a256, <8 x float> %b256, <8 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V512 = shufflevector <16 x float> %a512, <16 x float> %b512, <16 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %V64 = shufflevector <2 x float> %a64, <2 x float> %b64, <2 x i32> Index: test/Analysis/CostModel/X86/shuffle-reverse.ll =================================================================== --- test/Analysis/CostModel/X86/shuffle-reverse.ll +++ test/Analysis/CostModel/X86/shuffle-reverse.ll @@ -226,7 +226,7 @@ ; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; BTVER2-LABEL: 'test_vXi16' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void @@ -287,7 +287,7 @@ ; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; BTVER2-LABEL: 'test_vXi8' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void Index: test/Analysis/CostModel/X86/shuffle-single-src.ll =================================================================== --- test/Analysis/CostModel/X86/shuffle-single-src.ll +++ test/Analysis/CostModel/X86/shuffle-single-src.ll @@ -55,9 +55,9 @@ ; ; BTVER2-LABEL: 'test_vXf64' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V1024 = shufflevector <16 x double> %src1024, <16 x double> undef, <16 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> undef, <4 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> undef, <8 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V1024 = shufflevector <16 x double> %src1024, <16 x double> undef, <16 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %V128 = shufflevector <2 x double> %src128, <2 x double> undef, <2 x i32> @@ -100,8 +100,8 @@ ; ; BTVER2-LABEL: 'test_vXi64' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> undef, <4 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> undef, <8 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %V128 = shufflevector <2 x i64> %src128, <2 x i64> undef, <2 x i32> @@ -268,10 +268,10 @@ ; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; BTVER2-LABEL: 'test_vXi16' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> undef, <16 x i32> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 180 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> undef, <32 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 216 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> undef, <64 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %V128 = shufflevector <8 x i16> %src128, <8 x i16> undef, <8 x i32> @@ -337,9 +337,9 @@ ; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; BTVER2-LABEL: 'test_vXi8' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> undef, <32 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> undef, <64 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %V128 = shufflevector <16 x i8> %src128, <16 x i8> undef, <16 x i32> Index: test/Analysis/CostModel/X86/shuffle-two-src.ll =================================================================== --- test/Analysis/CostModel/X86/shuffle-two-src.ll +++ test/Analysis/CostModel/X86/shuffle-two-src.ll @@ -48,9 +48,9 @@ ; ; BTVER2-LABEL: 'test_vXf64' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <4 x double> %src256, <4 x double> %src256_1, <4 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512 = shufflevector <8 x double> %src512, <8 x double> %src512_1, <8 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V1024 = shufflevector <16 x double> %src1024, <16 x double> %src1024_1, <16 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %V128 = shufflevector <2 x double> %src128, <2 x double> %src128_1, <2 x i32> @@ -91,9 +91,9 @@ ; ; BTVER2-LABEL: 'test_vXi64' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <4 x i64> %src256, <4 x i64> %src256_1, <4 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512 = shufflevector <8 x i64> %src512, <8 x i64> %src512_1, <8 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V1024 = shufflevector <16 x i64> %src1024, <16 x i64> %src1024_1, <16 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %V128 = shufflevector <2 x i64> %src128, <2 x i64> %src128_1, <2 x i32> @@ -140,7 +140,7 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; BTVER2-LABEL: 'test_vXf32' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x float> %src128, <4 x float> %src128_1, <4 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x float> %src256, <8 x float> %src256_1, <8 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512 = shufflevector <16 x float> %src512, <16 x float> %src512_1, <16 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V1024 = shufflevector <32 x float> %src1024, <32 x float> %src1024_1, <32 x i32> @@ -190,7 +190,7 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; BTVER2-LABEL: 'test_vXi32' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V128 = shufflevector <4 x i32> %src128, <4 x i32> %src128_1, <4 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V256 = shufflevector <8 x i32> %src256, <8 x i32> %src256_1, <8 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V512 = shufflevector <16 x i32> %src512, <16 x i32> %src512_1, <16 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 112 for instruction: %V1024 = shufflevector <32 x i32> %src1024, <32 x i32> %src1024_1, <32 x i32> @@ -268,10 +268,10 @@ ; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; BTVER2-LABEL: 'test_vXi16' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 420 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256 = shufflevector <16 x i16> %src256, <16 x i16> %src256_1, <16 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V512 = shufflevector <32 x i16> %src512, <32 x i16> %src512_1, <32 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 504 for instruction: %V1024 = shufflevector <64 x i16> %src1024, <64 x i16> %src1024_1, <64 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %V128 = shufflevector <8 x i16> %src128, <8 x i16> %src128_1, <8 x i32> @@ -337,9 +337,9 @@ ; AVX512VBMI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; BTVER2-LABEL: 'test_vXi8' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 90 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V256 = shufflevector <32 x i8> %src256, <32 x i8> %src256_1, <32 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 108 for instruction: %V512 = shufflevector <64 x i8> %src512, <64 x i8> %src512_1, <64 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %V128 = shufflevector <16 x i8> %src128, <16 x i8> %src128_1, <16 x i32> Index: test/Analysis/CostModel/X86/vshift-ashr-cost.ll =================================================================== --- test/Analysis/CostModel/X86/vshift-ashr-cost.ll +++ test/Analysis/CostModel/X86/vshift-ashr-cost.ll @@ -44,7 +44,7 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift ; ; BTVER2-LABEL: 'var_shift_v2i64' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <2 x i64> %a, %b +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <2 x i64> %a, %b ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift ; %shift = ashr <2 x i64> %a, %b @@ -73,7 +73,7 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; BTVER2-LABEL: 'var_shift_v4i64' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %shift = ashr <4 x i64> %a, %b +; BTVER2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = ashr <4 x i64> %a, %b ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; %shift = ashr <4 x i64> %a, %b @@ -102,7 +102,7 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; BTVER2-LABEL: 'var_shift_v8i64' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %shift = ashr <8 x i64> %a, %b +; BTVER2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %shift = ashr <8 x i64> %a, %b ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; %shift = ashr <8 x i64> %a, %b @@ -139,7 +139,7 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift ; ; BTVER2-LABEL: 'var_shift_v4i32' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <4 x i32> %a, %b +; BTVER2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <4 x i32> %a, %b ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift ; %shift = ashr <4 x i32> %a, %b @@ -176,7 +176,7 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; BTVER2-LABEL: 'var_shift_v8i32' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %shift = ashr <8 x i32> %a, %b +; BTVER2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %shift = ashr <8 x i32> %a, %b ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; %shift = ashr <8 x i32> %a, %b @@ -213,7 +213,7 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; BTVER2-LABEL: 'var_shift_v16i32' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %shift = ashr <16 x i32> %a, %b +; BTVER2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %shift = ashr <16 x i32> %a, %b ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; %shift = ashr <16 x i32> %a, %b @@ -254,7 +254,7 @@ ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; ; BTVER2-LABEL: 'var_shift_v8i16' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <8 x i16> %a, %b +; BTVER2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %shift = ashr <8 x i16> %a, %b ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; %shift = ashr <8 x i16> %a, %b @@ -299,7 +299,7 @@ ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; BTVER2-LABEL: 'var_shift_v16i16' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %shift = ashr <16 x i16> %a, %b +; BTVER2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %shift = ashr <16 x i16> %a, %b ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; %shift = ashr <16 x i16> %a, %b @@ -344,7 +344,7 @@ ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; BTVER2-LABEL: 'var_shift_v32i16' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %shift = ashr <32 x i16> %a, %b +; BTVER2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %shift = ashr <32 x i16> %a, %b ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; %shift = ashr <32 x i16> %a, %b @@ -373,7 +373,7 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; BTVER2-LABEL: 'var_shift_v16i8' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <16 x i8> %a, %b +; BTVER2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %shift = ashr <16 x i8> %a, %b ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; %shift = ashr <16 x i8> %a, %b @@ -406,7 +406,7 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; BTVER2-LABEL: 'var_shift_v32i8' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %shift = ashr <32 x i8> %a, %b +; BTVER2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %shift = ashr <32 x i8> %a, %b ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; %shift = ashr <32 x i8> %a, %b @@ -451,7 +451,7 @@ ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; BTVER2-LABEL: 'var_shift_v64i8' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %shift = ashr <64 x i8> %a, %b +; BTVER2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %shift = ashr <64 x i8> %a, %b ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; %shift = ashr <64 x i8> %a, %b @@ -538,7 +538,7 @@ ; ; BTVER2-LABEL: 'splatvar_shift_v4i64' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer ; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = ashr <4 x i64> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; @@ -587,7 +587,7 @@ ; ; BTVER2-LABEL: 'splatvar_shift_v8i64' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer ; BTVER2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = ashr <8 x i64> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; @@ -655,7 +655,7 @@ ; ; BTVER2-LABEL: 'splatvar_shift_v8i32' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <8 x i32> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; @@ -704,7 +704,7 @@ ; ; BTVER2-LABEL: 'splatvar_shift_v16i32' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer ; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <16 x i32> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; @@ -802,7 +802,7 @@ ; ; BTVER2-LABEL: 'splatvar_shift_v16i16' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <16 x i16> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; @@ -875,7 +875,7 @@ ; ; BTVER2-LABEL: 'splatvar_shift_v32i16' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer ; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = ashr <32 x i16> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; @@ -918,8 +918,8 @@ ; ; BTVER2-LABEL: 'splatvar_shift_v16i8' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <16 x i8> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; BTVER2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %shift = ashr <16 x i8> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; %insert = insertelement <16 x i8> undef, i8 %b, i32 0 @@ -974,7 +974,7 @@ ; BTVER2-LABEL: 'splatvar_shift_v32i8' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %shift = ashr <32 x i8> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %shift = ashr <32 x i8> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; %insert = insertelement <32 x i8> undef, i8 %b, i32 0 @@ -1047,7 +1047,7 @@ ; BTVER2-LABEL: 'splatvar_shift_v64i8' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %shift = ashr <64 x i8> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %shift = ashr <64 x i8> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; %insert = insertelement <64 x i8> undef, i8 %b, i32 0 @@ -1082,7 +1082,7 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift ; ; BTVER2-LABEL: 'constant_shift_v2i64' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <2 x i64> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = ashr <2 x i64> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift ; %shift = ashr <2 x i64> %a, @@ -1111,7 +1111,7 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; BTVER2-LABEL: 'constant_shift_v4i64' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %shift = ashr <4 x i64> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = ashr <4 x i64> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; %shift = ashr <4 x i64> %a, @@ -1140,7 +1140,7 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; BTVER2-LABEL: 'constant_shift_v8i64' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %shift = ashr <8 x i64> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %shift = ashr <8 x i64> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; %shift = ashr <8 x i64> %a, @@ -1177,7 +1177,7 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift ; ; BTVER2-LABEL: 'constant_shift_v4i32' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = ashr <4 x i32> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = ashr <4 x i32> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift ; %shift = ashr <4 x i32> %a, @@ -1214,7 +1214,7 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; BTVER2-LABEL: 'constant_shift_v8i32' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %shift = ashr <8 x i32> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %shift = ashr <8 x i32> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; %shift = ashr <8 x i32> %a, @@ -1251,7 +1251,7 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; BTVER2-LABEL: 'constant_shift_v16i32' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %shift = ashr <16 x i32> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %shift = ashr <16 x i32> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; %shift = ashr <16 x i32> %a, @@ -1292,7 +1292,7 @@ ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; ; BTVER2-LABEL: 'constant_shift_v8i16' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = ashr <8 x i16> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %shift = ashr <8 x i16> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; %shift = ashr <8 x i16> %a, @@ -1337,7 +1337,7 @@ ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; BTVER2-LABEL: 'constant_shift_v16i16' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %shift = ashr <16 x i16> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %shift = ashr <16 x i16> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; %shift = ashr <16 x i16> %a, @@ -1382,7 +1382,7 @@ ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; BTVER2-LABEL: 'constant_shift_v32i16' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %shift = ashr <32 x i16> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %shift = ashr <32 x i16> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; %shift = ashr <32 x i16> %a, @@ -1411,7 +1411,7 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; BTVER2-LABEL: 'constant_shift_v16i8' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = ashr <16 x i8> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %shift = ashr <16 x i8> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; %shift = ashr <16 x i8> %a, @@ -1444,7 +1444,7 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; BTVER2-LABEL: 'constant_shift_v32i8' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %shift = ashr <32 x i8> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %shift = ashr <32 x i8> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; %shift = ashr <32 x i8> %a, @@ -1489,7 +1489,7 @@ ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; BTVER2-LABEL: 'constant_shift_v64i8' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: %shift = ashr <64 x i8> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 84 for instruction: %shift = ashr <64 x i8> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; %shift = ashr <64 x i8> %a, Index: test/Analysis/CostModel/X86/vshift-lshr-cost.ll =================================================================== --- test/Analysis/CostModel/X86/vshift-lshr-cost.ll +++ test/Analysis/CostModel/X86/vshift-lshr-cost.ll @@ -48,7 +48,7 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift ; ; BTVER2-LABEL: 'var_shift_v2i64' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <2 x i64> %a, %b +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <2 x i64> %a, %b ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift ; %shift = lshr <2 x i64> %a, %b @@ -81,7 +81,7 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; BTVER2-LABEL: 'var_shift_v4i64' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <4 x i64> %a, %b +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <4 x i64> %a, %b ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; %shift = lshr <4 x i64> %a, %b @@ -114,7 +114,7 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; BTVER2-LABEL: 'var_shift_v8i64' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = lshr <8 x i64> %a, %b +; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <8 x i64> %a, %b ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; %shift = lshr <8 x i64> %a, %b @@ -151,7 +151,7 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift ; ; BTVER2-LABEL: 'var_shift_v4i32' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = lshr <4 x i32> %a, %b +; BTVER2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <4 x i32> %a, %b ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift ; %shift = lshr <4 x i32> %a, %b @@ -188,7 +188,7 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; BTVER2-LABEL: 'var_shift_v8i32' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = lshr <8 x i32> %a, %b +; BTVER2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %shift = lshr <8 x i32> %a, %b ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; %shift = lshr <8 x i32> %a, %b @@ -225,7 +225,7 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; BTVER2-LABEL: 'var_shift_v16i32' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %shift = lshr <16 x i32> %a, %b +; BTVER2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %shift = lshr <16 x i32> %a, %b ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; %shift = lshr <16 x i32> %a, %b @@ -266,7 +266,7 @@ ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; ; BTVER2-LABEL: 'var_shift_v8i16' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <8 x i16> %a, %b +; BTVER2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %shift = lshr <8 x i16> %a, %b ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; %shift = lshr <8 x i16> %a, %b @@ -311,7 +311,7 @@ ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; BTVER2-LABEL: 'var_shift_v16i16' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %shift = lshr <16 x i16> %a, %b +; BTVER2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %shift = lshr <16 x i16> %a, %b ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; %shift = lshr <16 x i16> %a, %b @@ -356,7 +356,7 @@ ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; BTVER2-LABEL: 'var_shift_v32i16' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %shift = lshr <32 x i16> %a, %b +; BTVER2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %shift = lshr <32 x i16> %a, %b ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; %shift = lshr <32 x i16> %a, %b @@ -385,7 +385,7 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; BTVER2-LABEL: 'var_shift_v16i8' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i8> %a, %b +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <16 x i8> %a, %b ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; %shift = lshr <16 x i8> %a, %b @@ -418,7 +418,7 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; BTVER2-LABEL: 'var_shift_v32i8' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %shift = lshr <32 x i8> %a, %b +; BTVER2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %shift = lshr <32 x i8> %a, %b ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; %shift = lshr <32 x i8> %a, %b @@ -463,7 +463,7 @@ ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; BTVER2-LABEL: 'var_shift_v64i8' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %shift = lshr <64 x i8> %a, %b +; BTVER2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %shift = lshr <64 x i8> %a, %b ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; %shift = lshr <64 x i8> %a, %b @@ -532,7 +532,7 @@ ; ; BTVER2-LABEL: 'splatvar_shift_v4i64' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <4 x i64> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; @@ -581,7 +581,7 @@ ; ; BTVER2-LABEL: 'splatvar_shift_v8i64' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer ; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <8 x i64> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; @@ -649,7 +649,7 @@ ; ; BTVER2-LABEL: 'splatvar_shift_v8i32' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <8 x i32> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; @@ -698,7 +698,7 @@ ; ; BTVER2-LABEL: 'splatvar_shift_v16i32' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer ; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <16 x i32> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; @@ -796,7 +796,7 @@ ; ; BTVER2-LABEL: 'splatvar_shift_v16i16' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <16 x i16> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; @@ -869,7 +869,7 @@ ; ; BTVER2-LABEL: 'splatvar_shift_v32i16' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer ; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = lshr <32 x i16> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; @@ -912,8 +912,8 @@ ; ; BTVER2-LABEL: 'splatvar_shift_v16i8' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i8> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <16 x i8> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; %insert = insertelement <16 x i8> undef, i8 %b, i32 0 @@ -968,7 +968,7 @@ ; BTVER2-LABEL: 'splatvar_shift_v32i8' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %shift = lshr <32 x i8> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %shift = lshr <32 x i8> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; %insert = insertelement <32 x i8> undef, i8 %b, i32 0 @@ -1041,7 +1041,7 @@ ; BTVER2-LABEL: 'splatvar_shift_v64i8' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %shift = lshr <64 x i8> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %shift = lshr <64 x i8> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; %insert = insertelement <64 x i8> undef, i8 %b, i32 0 @@ -1080,7 +1080,7 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift ; ; BTVER2-LABEL: 'constant_shift_v2i64' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = lshr <2 x i64> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = lshr <2 x i64> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift ; %shift = lshr <2 x i64> %a, @@ -1113,7 +1113,7 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; BTVER2-LABEL: 'constant_shift_v4i64' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <4 x i64> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = lshr <4 x i64> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; %shift = lshr <4 x i64> %a, @@ -1146,7 +1146,7 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; BTVER2-LABEL: 'constant_shift_v8i64' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = lshr <8 x i64> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <8 x i64> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; %shift = lshr <8 x i64> %a, @@ -1183,7 +1183,7 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift ; ; BTVER2-LABEL: 'constant_shift_v4i32' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = lshr <4 x i32> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = lshr <4 x i32> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift ; %shift = lshr <4 x i32> %a, @@ -1220,7 +1220,7 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; BTVER2-LABEL: 'constant_shift_v8i32' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = lshr <8 x i32> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %shift = lshr <8 x i32> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; %shift = lshr <8 x i32> %a, @@ -1257,7 +1257,7 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; BTVER2-LABEL: 'constant_shift_v16i32' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %shift = lshr <16 x i32> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %shift = lshr <16 x i32> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; %shift = lshr <16 x i32> %a, @@ -1298,7 +1298,7 @@ ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; ; BTVER2-LABEL: 'constant_shift_v8i16' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = lshr <8 x i16> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %shift = lshr <8 x i16> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; %shift = lshr <8 x i16> %a, @@ -1343,7 +1343,7 @@ ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; BTVER2-LABEL: 'constant_shift_v16i16' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %shift = lshr <16 x i16> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %shift = lshr <16 x i16> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; %shift = lshr <16 x i16> %a, @@ -1388,7 +1388,7 @@ ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; BTVER2-LABEL: 'constant_shift_v32i16' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %shift = lshr <32 x i16> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %shift = lshr <32 x i16> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; %shift = lshr <32 x i16> %a, @@ -1417,7 +1417,7 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; BTVER2-LABEL: 'constant_shift_v16i8' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = lshr <16 x i8> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = lshr <16 x i8> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; %shift = lshr <16 x i8> %a, @@ -1450,7 +1450,7 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; BTVER2-LABEL: 'constant_shift_v32i8' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %shift = lshr <32 x i8> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %shift = lshr <32 x i8> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; %shift = lshr <32 x i8> %a, @@ -1495,7 +1495,7 @@ ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; BTVER2-LABEL: 'constant_shift_v64i8' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %shift = lshr <64 x i8> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %shift = lshr <64 x i8> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; %shift = lshr <64 x i8> %a, Index: test/Analysis/CostModel/X86/vshift-shl-cost.ll =================================================================== --- test/Analysis/CostModel/X86/vshift-shl-cost.ll +++ test/Analysis/CostModel/X86/vshift-shl-cost.ll @@ -44,7 +44,7 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift ; ; BTVER2-LABEL: 'var_shift_v2i64' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <2 x i64> %a, %b +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, %b ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift ; %shift = shl <2 x i64> %a, %b @@ -77,7 +77,7 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; BTVER2-LABEL: 'var_shift_v4i64' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <4 x i64> %a, %b +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <4 x i64> %a, %b ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; %shift = shl <4 x i64> %a, %b @@ -110,7 +110,7 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; BTVER2-LABEL: 'var_shift_v8i64' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = shl <8 x i64> %a, %b +; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <8 x i64> %a, %b ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; %shift = shl <8 x i64> %a, %b @@ -143,7 +143,7 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift ; ; BTVER2-LABEL: 'var_shift_v4i32' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i32> %a, %b +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <4 x i32> %a, %b ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %shift ; %shift = shl <4 x i32> %a, %b @@ -180,7 +180,7 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; BTVER2-LABEL: 'var_shift_v8i32' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <8 x i32> %a, %b +; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i32> %a, %b ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; %shift = shl <8 x i32> %a, %b @@ -217,7 +217,7 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; BTVER2-LABEL: 'var_shift_v16i32' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = shl <16 x i32> %a, %b +; BTVER2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %shift = shl <16 x i32> %a, %b ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; %shift = shl <16 x i32> %a, %b @@ -258,7 +258,7 @@ ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; ; BTVER2-LABEL: 'var_shift_v8i16' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %shift = shl <8 x i16> %a, %b +; BTVER2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %shift = shl <8 x i16> %a, %b ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %shift ; %shift = shl <8 x i16> %a, %b @@ -303,7 +303,7 @@ ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; BTVER2-LABEL: 'var_shift_v16i16' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %shift = shl <16 x i16> %a, %b +; BTVER2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %shift = shl <16 x i16> %a, %b ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; %shift = shl <16 x i16> %a, %b @@ -348,7 +348,7 @@ ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; BTVER2-LABEL: 'var_shift_v32i16' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %shift = shl <32 x i16> %a, %b +; BTVER2-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %shift = shl <32 x i16> %a, %b ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; %shift = shl <32 x i16> %a, %b @@ -377,7 +377,7 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; BTVER2-LABEL: 'var_shift_v16i8' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <16 x i8> %a, %b +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <16 x i8> %a, %b ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; %shift = shl <16 x i8> %a, %b @@ -410,7 +410,7 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; BTVER2-LABEL: 'var_shift_v32i8' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = shl <32 x i8> %a, %b +; BTVER2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = shl <32 x i8> %a, %b ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; %shift = shl <32 x i8> %a, %b @@ -455,7 +455,7 @@ ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; BTVER2-LABEL: 'var_shift_v64i8' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %shift = shl <64 x i8> %a, %b +; BTVER2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %shift = shl <64 x i8> %a, %b ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; %shift = shl <64 x i8> %a, %b @@ -524,7 +524,7 @@ ; ; BTVER2-LABEL: 'splatvar_shift_v4i64' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <4 x i64> undef, i64 %b, i32 0 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <4 x i64> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; @@ -573,7 +573,7 @@ ; ; BTVER2-LABEL: 'splatvar_shift_v8i64' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i64> undef, i64 %b, i32 0 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer ; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <8 x i64> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; @@ -641,7 +641,7 @@ ; ; BTVER2-LABEL: 'splatvar_shift_v8i32' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <8 x i32> undef, i32 %b, i32 0 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; @@ -690,7 +690,7 @@ ; ; BTVER2-LABEL: 'splatvar_shift_v16i32' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i32> undef, i32 %b, i32 0 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer +; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer ; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; @@ -788,7 +788,7 @@ ; ; BTVER2-LABEL: 'splatvar_shift_v16i16' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i16> undef, i16 %b, i32 0 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer ; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; @@ -861,7 +861,7 @@ ; ; BTVER2-LABEL: 'splatvar_shift_v32i16' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i16> undef, i16 %b, i32 0 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer ; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i16> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; @@ -904,8 +904,8 @@ ; ; BTVER2-LABEL: 'splatvar_shift_v16i8' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <16 x i8> undef, i8 %b, i32 0 -; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <16 x i8> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <16 x i8> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; %insert = insertelement <16 x i8> undef, i8 %b, i32 0 @@ -960,7 +960,7 @@ ; BTVER2-LABEL: 'splatvar_shift_v32i8' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <32 x i8> undef, i8 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = shl <32 x i8> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = shl <32 x i8> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; %insert = insertelement <32 x i8> undef, i8 %b, i32 0 @@ -1033,7 +1033,7 @@ ; BTVER2-LABEL: 'splatvar_shift_v64i8' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %insert = insertelement <64 x i8> undef, i8 %b, i32 0 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer -; BTVER2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %shift = shl <64 x i8> %a, %splat +; BTVER2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %shift = shl <64 x i8> %a, %splat ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; %insert = insertelement <64 x i8> undef, i8 %b, i32 0 @@ -1068,7 +1068,7 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift ; ; BTVER2-LABEL: 'constant_shift_v2i64' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <2 x i64> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shift = shl <2 x i64> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shift ; %shift = shl <2 x i64> %a, @@ -1101,7 +1101,7 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; ; BTVER2-LABEL: 'constant_shift_v4i64' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <4 x i64> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <4 x i64> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shift ; %shift = shl <4 x i64> %a, @@ -1134,7 +1134,7 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; ; BTVER2-LABEL: 'constant_shift_v8i64' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = shl <8 x i64> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shift = shl <8 x i64> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shift ; %shift = shl <8 x i64> %a, @@ -1204,7 +1204,7 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; ; BTVER2-LABEL: 'constant_shift_v8i32' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <8 x i32> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shift = shl <8 x i32> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shift ; %shift = shl <8 x i32> %a, @@ -1241,7 +1241,7 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; ; BTVER2-LABEL: 'constant_shift_v16i32' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <16 x i32> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <16 x i32> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shift ; %shift = shl <16 x i32> %a, @@ -1287,7 +1287,7 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; ; BTVER2-LABEL: 'constant_shift_v16i16' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shift = shl <16 x i16> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shift = shl <16 x i16> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shift ; %shift = shl <16 x i16> %a, @@ -1332,7 +1332,7 @@ ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; ; BTVER2-LABEL: 'constant_shift_v32i16' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shift = shl <32 x i16> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shift = shl <32 x i16> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shift ; %shift = shl <32 x i16> %a, @@ -1361,7 +1361,7 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; ; BTVER2-LABEL: 'constant_shift_v16i8' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %shift = shl <16 x i8> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shift = shl <16 x i8> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %shift ; %shift = shl <16 x i8> %a, @@ -1394,7 +1394,7 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; ; BTVER2-LABEL: 'constant_shift_v32i8' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %shift = shl <32 x i8> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shift = shl <32 x i8> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i8> %shift ; %shift = shl <32 x i8> %a, @@ -1439,7 +1439,7 @@ ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; ; BTVER2-LABEL: 'constant_shift_v64i8' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %shift = shl <64 x i8> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %shift = shl <64 x i8> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <64 x i8> %shift ; %shift = shl <64 x i8> %a, @@ -1933,7 +1933,7 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shl ; ; BTVER2-LABEL: 'test5' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shl = shl <2 x i64> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %shl = shl <2 x i64> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %shl ; %shl = shl <2 x i64> %a, @@ -1978,7 +1978,7 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shl ; ; BTVER2-LABEL: 'test6' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shl = shl <16 x i16> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %shl = shl <16 x i16> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i16> %shl ; %shl = shl <16 x i16> %a, @@ -2019,7 +2019,7 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shl ; ; BTVER2-LABEL: 'test7' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %shl = shl <8 x i32> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %shl = shl <8 x i32> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i32> %shl ; %shl = shl <8 x i32> %a, @@ -2057,7 +2057,7 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shl ; ; BTVER2-LABEL: 'test8' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shl = shl <4 x i64> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shl = shl <4 x i64> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i64> %shl ; %shl = shl <4 x i64> %a, @@ -2104,7 +2104,7 @@ ; AVX512BWVL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shl ; ; BTVER2-LABEL: 'test9' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shl = shl <32 x i16> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %shl = shl <32 x i16> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <32 x i16> %shl ; %shl = shl <32 x i16> %a, @@ -2143,7 +2143,7 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shl ; ; BTVER2-LABEL: 'test10' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %shl = shl <16 x i32> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %shl = shl <16 x i32> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i32> %shl ; %shl = shl <16 x i32> %a, @@ -2181,7 +2181,7 @@ ; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shl ; ; BTVER2-LABEL: 'test11' -; BTVER2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %shl = shl <8 x i64> %a, +; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %shl = shl <8 x i64> %a, ; BTVER2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i64> %shl ; %shl = shl <8 x i64> %a, Index: utils/TableGen/SubtargetEmitter.cpp =================================================================== --- utils/TableGen/SubtargetEmitter.cpp +++ utils/TableGen/SubtargetEmitter.cpp @@ -1498,7 +1498,7 @@ if (NumNonTruePreds) { bool FirstNonTruePredicate = true; - SS << "if ("; + SS << "if (MI && ("; PE.setIndentLevel(PE.getIndentLevel() + 2); @@ -1527,7 +1527,7 @@ << ((NumNonTruePreds > 1) ? ")" : ""); } - SS << ")\n"; // end of if-stmt + SS << "))\n"; // end of if-stmt PE.decreaseIndentLevel(); SS.indent(PE.getIndentLevel() * 2); PE.decreaseIndentLevel();