Index: lib/Target/X86/X86InstrInfo.td =================================================================== --- lib/Target/X86/X86InstrInfo.td +++ lib/Target/X86/X86InstrInfo.td @@ -1749,7 +1749,7 @@ // Bit tests instructions: BT, BTS, BTR, BTC. let Defs = [EFLAGS] in { -let SchedRW = [WriteALU] in { +let SchedRW = [WriteBitTest] in { def BT16rr : I<0xA3, MRMDestReg, (outs), (ins GR16:$src1, GR16:$src2), "bt{w}\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86bt GR16:$src1, GR16:$src2))]>, @@ -1782,7 +1782,7 @@ []>, TB, NotMemoryFoldable; } -let SchedRW = [WriteALU] in { +let SchedRW = [WriteBitTest] in { def BT16ri8 : Ii8<0xBA, MRM4r, (outs), (ins GR16:$src1, i16i8imm:$src2), "bt{w}\t{$src2, $src1|$src1, $src2}", [(set EFLAGS, (X86bt GR16:$src1, i16immSExt8:$src2))]>, @@ -1817,7 +1817,7 @@ } // SchedRW let hasSideEffects = 0 in { -let SchedRW = [WriteALU], Constraints = "$src1 = $dst" in { +let SchedRW = [WriteBitTest], Constraints = "$src1 = $dst" in { def BTC16rr : I<0xBB, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), "btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize16, TB, NotMemoryFoldable; @@ -1841,7 +1841,7 @@ NotMemoryFoldable; } -let SchedRW = [WriteALU], Constraints = "$src1 = $dst" in { +let SchedRW = [WriteBitTest], Constraints = "$src1 = $dst" in { def BTC16ri8 : Ii8<0xBA, MRM7r, (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2), "btc{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize16, TB; def BTC32ri8 : Ii8<0xBA, MRM7r, (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2), @@ -1860,7 +1860,7 @@ Requires<[In64BitMode]>; } -let SchedRW = [WriteALU], Constraints = "$src1 = $dst" in { +let SchedRW = [WriteBitTest], Constraints = "$src1 = $dst" in { def BTR16rr : I<0xB3, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), "btr{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize16, TB, NotMemoryFoldable; @@ -1884,7 +1884,7 @@ NotMemoryFoldable; } -let SchedRW = [WriteALU], Constraints = "$src1 = $dst" in { +let SchedRW = [WriteBitTest], Constraints = "$src1 = $dst" in { def BTR16ri8 : Ii8<0xBA, MRM6r, (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2), "btr{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize16, TB; @@ -1907,7 +1907,7 @@ Requires<[In64BitMode]>; } -let SchedRW = [WriteALU], Constraints = "$src1 = $dst" in { +let SchedRW = [WriteBitTest], Constraints = "$src1 = $dst" in { def BTS16rr : I<0xAB, MRMDestReg, (outs GR16:$dst), (ins GR16:$src1, GR16:$src2), "bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize16, TB, NotMemoryFoldable; @@ -1931,7 +1931,7 @@ NotMemoryFoldable; } -let SchedRW = [WriteALU], Constraints = "$src1 = $dst" in { +let SchedRW = [WriteBitTest], Constraints = "$src1 = $dst" in { def BTS16ri8 : Ii8<0xBA, MRM5r, (outs GR16:$dst), (ins GR16:$src1, i16i8imm:$src2), "bts{w}\t{$src2, $src1|$src1, $src2}", []>, OpSize16, TB; def BTS32ri8 : Ii8<0xBA, MRM5r, (outs GR32:$dst), (ins GR32:$src1, i32i8imm:$src2), Index: lib/Target/X86/X86SchedBroadwell.td =================================================================== --- lib/Target/X86/X86SchedBroadwell.td +++ lib/Target/X86/X86SchedBroadwell.td @@ -110,6 +110,7 @@ defm : BWWriteResPair; // Integer ALU + flags op. defm : BWWriteResPair; // Integer multiplication. defm : BWWriteResPair; // Integer 64-bit multiplication. +def : WriteRes; // defm : BWWriteResPair; defm : BWWriteResPair; @@ -598,14 +599,6 @@ let ResourceCycles = [1]; } def: InstRW<[BWWriteResGroup6], (instrs CDQ, CQO)>; -def: InstRW<[BWWriteResGroup6], (instregex "BT(16|32|64)ri8", - "BT(16|32|64)rr", - "BTC(16|32|64)ri8", - "BTC(16|32|64)rr", - "BTR(16|32|64)ri8", - "BTR(16|32|64)rr", - "BTS(16|32|64)ri8", - "BTS(16|32|64)rr")>; def BWWriteResGroup7 : SchedWriteRes<[BWPort15]> { let Latency = 1; Index: lib/Target/X86/X86SchedHaswell.td =================================================================== --- lib/Target/X86/X86SchedHaswell.td +++ lib/Target/X86/X86SchedHaswell.td @@ -119,6 +119,7 @@ def : WriteRes; defm : HWWriteResPair; +def : WriteRes; defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; @@ -882,14 +883,6 @@ let ResourceCycles = [1]; } def: InstRW<[HWWriteResGroup7], (instrs CDQ, CQO)>; -def: InstRW<[HWWriteResGroup7], (instregex "BT(16|32|64)ri8", - "BT(16|32|64)rr", - "BTC(16|32|64)ri8", - "BTC(16|32|64)rr", - "BTR(16|32|64)ri8", - "BTR(16|32|64)rr", - "BTS(16|32|64)ri8", - "BTS(16|32|64)rr")>; def HWWriteResGroup8 : SchedWriteRes<[HWPort15]> { let Latency = 1; Index: lib/Target/X86/X86SchedSandyBridge.td =================================================================== --- lib/Target/X86/X86SchedSandyBridge.td +++ lib/Target/X86/X86SchedSandyBridge.td @@ -107,6 +107,7 @@ def : WriteRes; defm : SBWriteResPair; +def : WriteRes; defm : SBWriteResPair; defm : SBWriteResPair; defm : SBWriteResPair; @@ -561,14 +562,6 @@ let ResourceCycles = [1]; } def: InstRW<[SBWriteResGroup4], (instrs CDQ, CQO)>; -def: InstRW<[SBWriteResGroup4], (instregex "BT(16|32|64)ri8", - "BT(16|32|64)rr", - "BTC(16|32|64)ri8", - "BTC(16|32|64)rr", - "BTR(16|32|64)ri8", - "BTR(16|32|64)rr", - "BTS(16|32|64)ri8", - "BTS(16|32|64)rr")>; def SBWriteResGroup5 : SchedWriteRes<[SBPort15]> { let Latency = 1; Index: lib/Target/X86/X86SchedSkylakeClient.td =================================================================== --- lib/Target/X86/X86SchedSkylakeClient.td +++ lib/Target/X86/X86SchedSkylakeClient.td @@ -109,6 +109,7 @@ defm : SKLWriteResPair; // Integer ALU + flags op. defm : SKLWriteResPair; // Integer multiplication. defm : SKLWriteResPair; // Integer 64-bit multiplication. +def : WriteRes; // defm : SKLWriteResPair; defm : SKLWriteResPair; @@ -599,14 +600,6 @@ let ResourceCycles = [1]; } def: InstRW<[SKLWriteResGroup7], (instrs CDQ, CQO, CLAC, STAC)>; -def: InstRW<[SKLWriteResGroup7], (instregex "BT(16|32|64)ri8", - "BT(16|32|64)rr", - "BTC(16|32|64)ri8", - "BTC(16|32|64)rr", - "BTR(16|32|64)ri8", - "BTR(16|32|64)rr", - "BTS(16|32|64)ri8", - "BTS(16|32|64)rr")>; def SKLWriteResGroup8 : SchedWriteRes<[SKLPort15]> { let Latency = 1; Index: lib/Target/X86/X86SchedSkylakeServer.td =================================================================== --- lib/Target/X86/X86SchedSkylakeServer.td +++ lib/Target/X86/X86SchedSkylakeServer.td @@ -109,6 +109,7 @@ defm : SKXWriteResPair; // Integer ALU + flags op. defm : SKXWriteResPair; // Integer multiplication. defm : SKXWriteResPair; // Integer 64-bit multiplication. +def : WriteRes; // defm : SKXWriteResPair; defm : SKXWriteResPair; @@ -612,14 +613,6 @@ let ResourceCycles = [1]; } def: InstRW<[SKXWriteResGroup7], (instrs CDQ, CQO, CLAC, STAC)>; -def: InstRW<[SKXWriteResGroup7], (instregex "BT(16|32|64)ri8", - "BT(16|32|64)rr", - "BTC(16|32|64)ri8", - "BTC(16|32|64)rr", - "BTR(16|32|64)ri8", - "BTR(16|32|64)rr", - "BTS(16|32|64)ri8", - "BTS(16|32|64)rr")>; def SKXWriteResGroup8 : SchedWriteRes<[SKXPort15]> { let Latency = 1; Index: lib/Target/X86/X86Schedule.td =================================================================== --- lib/Target/X86/X86Schedule.td +++ lib/Target/X86/X86Schedule.td @@ -118,6 +118,9 @@ def WriteIMulH : SchedWrite; // Integer multiplication, high part. def WriteLEA : SchedWrite; // LEA instructions can't fold loads. +// Bit Test +def WriteBitTest : SchedWrite; // Bit Test - TODO add memory folding support + // Integer division. defm WriteDiv8 : X86SchedWritePair; defm WriteDiv16 : X86SchedWritePair; Index: lib/Target/X86/X86ScheduleAtom.td =================================================================== --- lib/Target/X86/X86ScheduleAtom.td +++ lib/Target/X86/X86ScheduleAtom.td @@ -80,6 +80,7 @@ defm : AtomWriteResPair; defm : AtomWriteResPair; defm : AtomWriteResPair; +def : WriteRes; defm : AtomWriteResPair; defm : AtomWriteResPair; Index: lib/Target/X86/X86ScheduleBtVer2.td =================================================================== --- lib/Target/X86/X86ScheduleBtVer2.td +++ lib/Target/X86/X86ScheduleBtVer2.td @@ -168,6 +168,8 @@ defm : JWriteResIntPair; // i64 multiplication defm : X86WriteRes; +def : WriteRes; + defm : JWriteResIntPair; defm : JWriteResIntPair; defm : JWriteResIntPair; Index: lib/Target/X86/X86ScheduleSLM.td =================================================================== --- lib/Target/X86/X86ScheduleSLM.td +++ lib/Target/X86/X86ScheduleSLM.td @@ -94,6 +94,7 @@ def : InstRW<[WriteMove], (instrs COPY)>; defm : SLMWriteResPair; +def : WriteRes; defm : SLMWriteResPair; defm : SLMWriteResPair; defm : SLMWriteResPair; Index: lib/Target/X86/X86ScheduleZnver1.td =================================================================== --- lib/Target/X86/X86ScheduleZnver1.td +++ lib/Target/X86/X86ScheduleZnver1.td @@ -177,6 +177,7 @@ def : WriteRes; defm : ZnWriteResPair; defm : ZnWriteResPair; +def : WriteRes; defm : ZnWriteResPair; defm : ZnWriteResPair; defm : ZnWriteResPair; Index: utils/TableGen/CodeGenSchedule.h =================================================================== --- utils/TableGen/CodeGenSchedule.h +++ utils/TableGen/CodeGenSchedule.h @@ -443,6 +443,8 @@ void collectSchedClasses(); + void checkSchedClasses(); + void collectRetireControlUnits(); void collectRegisterFiles(); Index: utils/TableGen/CodeGenSchedule.cpp =================================================================== --- utils/TableGen/CodeGenSchedule.cpp +++ utils/TableGen/CodeGenSchedule.cpp @@ -21,6 +21,7 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Regex.h" #include "llvm/Support/raw_ostream.h" @@ -33,6 +34,16 @@ #define DEBUG_TYPE "subtarget-emitter" +#ifdef EXPENSIVE_CHECKS +// FIXME: TableGen is failed iff EXPENSIVE_CHECKS defined +static constexpr bool OptCheckSchedClasses = true; +#else +// FIXME: the default value should be false +static cl::opt OptCheckSchedClasses( + "check-sched-class-table", cl::init(true), cl::Hidden, + cl::desc("Check sched class table on different types of inconsistencies")); +#endif + #ifndef NDEBUG static void dumpIdxVec(ArrayRef V) { for (unsigned Idx : V) @@ -223,6 +234,7 @@ collectOptionalProcessorInfo(); checkCompleteness(); + checkSchedClasses(); } void CodeGenSchedModels::collectRetireControlUnits() { @@ -699,6 +711,86 @@ } } +void CodeGenSchedModels::checkSchedClasses() { + if (!OptCheckSchedClasses) + return; + + std::string str; + raw_string_ostream OS(str); + + // Check each instruction for each model to see if its overridden too often. + // Iff YES it's a candidate for more fine-grained Sched Class. + for (const CodeGenInstruction *Inst : Target.getInstructionsByEnumValue()) { + StringRef InstName = Inst->TheDef->getName(); + unsigned SCIdx = getSchedClassIdx(*Inst); + if (!SCIdx) + continue; + CodeGenSchedClass &SC = getSchedClass(SCIdx); + if (SC.Writes.empty()) + continue; + const RecVec &RWDefs = SchedClasses[SCIdx].InstRWs; + if (RWDefs.empty()) + continue; + // FIXME: what should be threshold here? + if (RWDefs.size() > (ProcModels.size() / 2)) { + // FIXME: this dump hangs the execution !!! + // SC.dump(&Target.getSchedModels()); + OS << "SchedRW machine model for inst '" << InstName << "' ("; + for (auto I : SC.Writes) + OS << " " << SchedWrites[I].Name; + for (auto I : SC.Reads) + OS << " " << SchedReads[I].Name; + OS << " ) should be updated /improvedbecause it's overriden " << RWDefs.size() + << " times out of " << ProcModels.size() << " models:\n\t"; + for (Record *RWDef : RWDefs) + OS << " " << getProcModel(RWDef->getValueAsDef("SchedModel")).ModelName; + PrintWarning(OS.str()); + str.clear(); + } + + // TODO: here we should check latency/uop in SC vs. RWDef. Maybe we + // should do it iff RWDefs.size() == 1 only. + // Iff latency/uop are the same then warn about unnecessary redefine. + if (RWDefs.size()) { + for (Record *RWDef : RWDefs) { + IdxVec Writes; + IdxVec Reads; + findRWs(RWDef->getValueAsListOfDefs("OperandReadWrites"), Writes, + Reads); + + if ((Writes.size() == SC.Writes.size()) && + (Reads.size() == SC.Reads.size())) { + // TODO: do we need sorting Write & Reads? + for (unsigned I = 0, S = SC.Writes.size(); I < S; I++) { + auto SCSchedW = SchedWrites[SC.Writes[I]]; + auto SchedW = SchedWrites[Writes[I]]; + if (!SCSchedW.TheDef || !SchedW.TheDef) + continue; + const RecordVal *R = SCSchedW.TheDef->getValue("Latency"); + // FIXME: We should deal with default Latency here + if (!R || !R->getValue()) + continue; + auto SCLat = SCSchedW.TheDef->getValueAsInt("Latency"); + auto SCuOp = SCSchedW.TheDef->getValueAsInt("NumMicroOps"); + auto Lat = SchedW.TheDef->getValueAsInt("Latency"); + auto uOp = SchedW.TheDef->getValueAsInt("NumMicroOps"); + if ((SCLat == Lat) && (SCuOp == uOp)) + OS << "Overridden verion of inst '" << InstName + << "' has the same latency & uOp values as the original one " + "for model '" + << getProcModel(RWDef->getValueAsDef("SchedModel")).ModelName + << "'\n"; + } + if (!str.empty()) { + PrintWarning(OS.str()); + str.clear(); + } + } + } + } + } +} + // Get the SchedClass index for an instruction. unsigned CodeGenSchedModels::getSchedClassIdx(const CodeGenInstruction &Inst) const {