Index: include/llvm/Target/TargetSchedule.td =================================================================== --- include/llvm/Target/TargetSchedule.td +++ include/llvm/Target/TargetSchedule.td @@ -281,10 +281,9 @@ // ProcResources indicates the set of resources consumed by the write. // Optionally, ResourceCycles indicates the number of cycles the // resource is consumed. Each ResourceCycles item is paired with the -// ProcResource item at the same position in its list. Since -// ResourceCycles are rarely specialized, the list may be -// incomplete. By default, resources are consumed for a single cycle, -// regardless of latency, which models a fully pipelined processing +// ProcResource item at the same position in its list. ResourceCycles +// can be `[]`: in that case, all resources are consumed for a single +// cycle, regardless of latency, which models a fully pipelined processing // unit. A value of 0 for ResourceCycles means that the resource must // be available but is not consumed, which is only relevant for // unbuffered resources. Index: lib/Target/AArch64/AArch64SchedExynosM1.td =================================================================== --- lib/Target/AArch64/AArch64SchedExynosM1.td +++ lib/Target/AArch64/AArch64SchedExynosM1.td @@ -107,7 +107,7 @@ def M1WriteLD : SchedWriteRes<[M1UnitL, M1UnitA]> { let Latency = 6; let NumMicroOps = 2; - let ResourceCycles = [2]; } + let ResourceCycles = [2, 1]; } def M1WriteLH : SchedWriteRes<[]> { let Latency = 5; let NumMicroOps = 0; } def M1WriteLX : SchedWriteVariant<[SchedVar, @@ -319,19 +319,19 @@ def M1WriteVLDD : SchedWriteRes<[M1UnitL, M1UnitNALU]> { let Latency = 7; let NumMicroOps = 2; - let ResourceCycles = [2]; } + let ResourceCycles = [2, 1]; } def M1WriteVLDE : SchedWriteRes<[M1UnitL, M1UnitNALU]> { let Latency = 6; let NumMicroOps = 2; } def M1WriteVLDF : SchedWriteRes<[M1UnitL, M1UnitL]> { let Latency = 10; let NumMicroOps = 2; - let ResourceCycles = [5]; } + let ResourceCycles = [5, 1]; } def M1WriteVLDG : SchedWriteRes<[M1UnitL, M1UnitNALU, M1UnitNALU]> { let Latency = 7; let NumMicroOps = 3; - let ResourceCycles = [2]; } + let ResourceCycles = [2, 1, 1]; } def M1WriteVLDH : SchedWriteRes<[M1UnitL, M1UnitNALU, M1UnitNALU]> { let Latency = 6; @@ -340,27 +340,27 @@ M1UnitL, M1UnitL]> { let Latency = 12; let NumMicroOps = 3; - let ResourceCycles = [6]; } + let ResourceCycles = [6, 1, 1]; } def M1WriteVLDJ : SchedWriteRes<[M1UnitL, M1UnitNALU, M1UnitNALU, M1UnitNALU]> { let Latency = 9; let NumMicroOps = 4; - let ResourceCycles = [4]; } + let ResourceCycles = [4, 1, 1, 1]; } def M1WriteVLDK : SchedWriteRes<[M1UnitL, M1UnitNALU, M1UnitNALU, M1UnitNALU, M1UnitNALU]> { let Latency = 9; let NumMicroOps = 5; - let ResourceCycles = [4]; } + let ResourceCycles = [4, 1, 1, 1, 1]; } def M1WriteVLDL : SchedWriteRes<[M1UnitL, M1UnitNALU, M1UnitNALU, M1UnitL, M1UnitNALU]> { let Latency = 7; let NumMicroOps = 5; - let ResourceCycles = [2]; } + let ResourceCycles = [2, 1, 1, 1, 1]; } def M1WriteVLDM : SchedWriteRes<[M1UnitL, M1UnitNALU, M1UnitNALU, @@ -368,13 +368,13 @@ M1UnitNALU, M1UnitNALU]> { let Latency = 7; let NumMicroOps = 6; - let ResourceCycles = [2]; } + let ResourceCycles = [2, 1, 1, 1, 1, 1]; } def M1WriteVLDN : SchedWriteRes<[M1UnitL, M1UnitL, M1UnitL, M1UnitL]> { let Latency = 14; let NumMicroOps = 4; - let ResourceCycles = [7]; } + let ResourceCycles = [7, 1, 1, 1]; } def M1WriteVSTA : WriteSequence<[WriteVST], 2>; def M1WriteVSTB : WriteSequence<[WriteVST], 3>; def M1WriteVSTC : WriteSequence<[WriteVST], 4>; @@ -382,14 +382,14 @@ M1UnitFST, M1UnitFST]> { let Latency = 7; let NumMicroOps = 2; - let ResourceCycles = [7]; } + let ResourceCycles = [7, 1, 1]; } def M1WriteVSTE : SchedWriteRes<[M1UnitS, M1UnitFST, M1UnitS, M1UnitFST, M1UnitFST]> { let Latency = 8; let NumMicroOps = 3; - let ResourceCycles = [8]; } + let ResourceCycles = [8, 1, 1, 1, 1]; } def M1WriteVSTF : SchedWriteRes<[M1UnitNALU, M1UnitS, M1UnitFST, @@ -398,7 +398,7 @@ M1UnitFST, M1UnitFST]> { let Latency = 15; let NumMicroOps = 5; - let ResourceCycles = [15]; } + let ResourceCycles = [15, 1, 1, 1, 1, 1, 1]; } def M1WriteVSTG : SchedWriteRes<[M1UnitNALU, M1UnitS, M1UnitFST, @@ -409,14 +409,14 @@ M1UnitFST, M1UnitFST]> { let Latency = 16; let NumMicroOps = 6; - let ResourceCycles = [16]; } + let ResourceCycles = [16, 1, 1, 1, 1, 1, 1, 1, 1]; } def M1WriteVSTH : SchedWriteRes<[M1UnitNALU, M1UnitS, M1UnitFST, M1UnitFST, M1UnitFST]> { let Latency = 14; let NumMicroOps = 4; - let ResourceCycles = [14]; } + let ResourceCycles = [14, 1, 1, 1, 1]; } def M1WriteVSTI : SchedWriteRes<[M1UnitNALU, M1UnitS, M1UnitFST, @@ -429,7 +429,7 @@ M1UnitFST, M1UnitFST]> { let Latency = 17; let NumMicroOps = 7; - let ResourceCycles = [17]; } + let ResourceCycles = [17, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]; } // Branch instructions def : InstRW<[M1WriteB1], (instrs Bcc)>; Index: lib/Target/AArch64/AArch64SchedExynosM3.td =================================================================== --- lib/Target/AArch64/AArch64SchedExynosM3.td +++ lib/Target/AArch64/AArch64SchedExynosM3.td @@ -301,22 +301,22 @@ def M3WriteNEONI : SchedWriteRes<[M3UnitNSHF, M3UnitS]> { let Latency = 5; let NumMicroOps = 2; } -def M3WriteNEONV : SchedWriteRes<[M3UnitFDIV, - M3UnitFDIV]> { let Latency = 7; - let NumMicroOps = 1; - let ResourceCycles = [8]; } -def M3WriteNEONW : SchedWriteRes<[M3UnitFDIV, - M3UnitFDIV]> { let Latency = 12; - let NumMicroOps = 1; - let ResourceCycles = [13]; } +def M3WriteNEONV : SchedWriteRes<[M3UnitFDIV0, + M3UnitFDIV1]> { let Latency = 7; + let NumMicroOps = 2; + let ResourceCycles = [8, 8]; } +def M3WriteNEONW : SchedWriteRes<[M3UnitFDIV0, + M3UnitFDIV1]> { let Latency = 12; + let NumMicroOps = 2; + let ResourceCycles = [13, 13]; } def M3WriteNEONX : SchedWriteRes<[M3UnitFSQR, M3UnitFSQR]> { let Latency = 18; - let NumMicroOps = 1; - let ResourceCycles = [19]; } + let NumMicroOps = 2; + let ResourceCycles = [19, 19]; } def M3WriteNEONY : SchedWriteRes<[M3UnitFSQR, M3UnitFSQR]> { let Latency = 25; - let NumMicroOps = 1; - let ResourceCycles = [26]; } + let NumMicroOps = 2; + let ResourceCycles = [26, 26]; } def M3WriteNEONZ : SchedWriteRes<[M3UnitNMSC, M3UnitNMSC]> { let Latency = 5; let NumMicroOps = 2; } @@ -365,50 +365,50 @@ def M3WriteVLDD : SchedWriteRes<[M3UnitL, M3UnitNALU]> { let Latency = 7; let NumMicroOps = 2; - let ResourceCycles = [2]; } + let ResourceCycles = [2, 1]; } def M3WriteVLDE : SchedWriteRes<[M3UnitL, M3UnitNALU]> { let Latency = 6; let NumMicroOps = 2; - let ResourceCycles = [2]; } + let ResourceCycles = [2, 1]; } def M3WriteVLDF : SchedWriteRes<[M3UnitL, M3UnitL]> { let Latency = 10; let NumMicroOps = 2; - let ResourceCycles = [5]; } + let ResourceCycles = [5, 1]; } def M3WriteVLDG : SchedWriteRes<[M3UnitL, M3UnitNALU, M3UnitNALU]> { let Latency = 7; let NumMicroOps = 3; - let ResourceCycles = [2]; } + let ResourceCycles = [2, 1, 1]; } def M3WriteVLDH : SchedWriteRes<[M3UnitL, M3UnitNALU, M3UnitNALU]> { let Latency = 6; let NumMicroOps = 3; - let ResourceCycles = [2]; } + let ResourceCycles = [2, 1, 1]; } def M3WriteVLDI : SchedWriteRes<[M3UnitL, M3UnitL, M3UnitL]> { let Latency = 12; let NumMicroOps = 3; - let ResourceCycles = [6]; } + let ResourceCycles = [6, 6, 6]; } def M3WriteVLDJ : SchedWriteRes<[M3UnitL, M3UnitNALU, M3UnitNALU, M3UnitNALU]> { let Latency = 7; let NumMicroOps = 4; - let ResourceCycles = [2]; } + let ResourceCycles = [2, 1, 1, 1]; } def M3WriteVLDK : SchedWriteRes<[M3UnitL, M3UnitNALU, M3UnitNALU, M3UnitNALU, M3UnitNALU]> { let Latency = 9; let NumMicroOps = 5; - let ResourceCycles = [4]; } + let ResourceCycles = [4, 1, 1, 1, 1]; } def M3WriteVLDL : SchedWriteRes<[M3UnitL, M3UnitNALU, M3UnitNALU, M3UnitL, M3UnitNALU]> { let Latency = 6; let NumMicroOps = 5; - let ResourceCycles = [3]; } + let ResourceCycles = [6, 1, 1, 6, 1]; } def M3WriteVLDM : SchedWriteRes<[M3UnitL, M3UnitNALU, M3UnitNALU, @@ -416,13 +416,13 @@ M3UnitNALU, M3UnitNALU]> { let Latency = 7; let NumMicroOps = 6; - let ResourceCycles = [3]; } + let ResourceCycles = [6, 1, 1, 6, 1, 1]; } def M3WriteVLDN : SchedWriteRes<[M3UnitL, M3UnitL, M3UnitL, M3UnitL]> { let Latency = 14; let NumMicroOps = 4; - let ResourceCycles = [7]; } + let ResourceCycles = [6, 6, 6, 6]; } def M3WriteVSTA : WriteSequence<[WriteVST], 2>; def M3WriteVSTB : WriteSequence<[WriteVST], 3>; def M3WriteVSTC : WriteSequence<[WriteVST], 4>; @@ -430,16 +430,16 @@ M3UnitFST, M3UnitS, M3UnitFST]> { let Latency = 7; - let NumMicroOps = 2; - let ResourceCycles = [7]; } + let NumMicroOps = 4; + let ResourceCycles = [1, 3, 1, 3]; } def M3WriteVSTE : SchedWriteRes<[M3UnitS, M3UnitFST, M3UnitS, M3UnitFST, M3UnitS, M3UnitFST]> { let Latency = 8; - let NumMicroOps = 3; - let ResourceCycles = [8]; } + let NumMicroOps = 6; + let ResourceCycles = [1, 3, 1, 3, 1, 3]; } def M3WriteVSTF : SchedWriteRes<[M3UnitNALU, M3UnitFST, M3UnitFST, @@ -447,8 +447,8 @@ M3UnitFST, M3UnitS, M3UnitFST]> { let Latency = 15; - let NumMicroOps = 5; - let ResourceCycles = [15]; } + let NumMicroOps = 7; + let ResourceCycles = [1, 3, 3, 1, 3, 1, 3]; } def M3WriteVSTG : SchedWriteRes<[M3UnitNALU, M3UnitFST, M3UnitFST, @@ -458,15 +458,15 @@ M3UnitFST, M3UnitS, M3UnitFST]> { let Latency = 16; - let NumMicroOps = 6; - let ResourceCycles = [16]; } + let NumMicroOps = 9; + let ResourceCycles = [1, 3, 3, 1, 3, 1, 3, 1, 3]; } def M3WriteVSTH : SchedWriteRes<[M3UnitNALU, M3UnitFST, M3UnitFST, M3UnitS, M3UnitFST]> { let Latency = 14; - let NumMicroOps = 4; - let ResourceCycles = [14]; } + let NumMicroOps = 5; + let ResourceCycles = [1, 3, 3, 1, 3]; } def M3WriteVSTI : SchedWriteRes<[M3UnitNALU, M3UnitFST, M3UnitFST, @@ -476,8 +476,8 @@ M3UnitFST, M3UnitS, M3UnitFST]> { let Latency = 17; - let NumMicroOps = 7; - let ResourceCycles = [17]; } + let NumMicroOps = 9; + let ResourceCycles = [1, 3, 3, 1, 3, 1, 3, 1, 3]; } // Special cases. def M3WriteAES : SchedWriteRes<[M3UnitNCRY]> { let Latency = 1; } Index: lib/Target/AArch64/AArch64SchedThunderX2T99.td =================================================================== --- lib/Target/AArch64/AArch64SchedThunderX2T99.td +++ lib/Target/AArch64/AArch64SchedThunderX2T99.td @@ -416,7 +416,7 @@ // Address generation def : WriteRes { let Latency = 1; - let ResourceCycles = [1, 3]; + let ResourceCycles = [1]; let NumMicroOps = 2; } @@ -438,7 +438,7 @@ // ALU, extend and/or shift def : WriteRes { let Latency = 2; - let ResourceCycles = [2, 3]; + let ResourceCycles = [2]; let NumMicroOps = 2; } @@ -457,7 +457,7 @@ def : WriteRes { let Latency = 1; - let ResourceCycles = [1, 3]; + let ResourceCycles = [1]; let NumMicroOps = 2; } @@ -500,14 +500,14 @@ // Latency range of 13-23/13-39. def : WriteRes { let Latency = 39; - let ResourceCycles = [13, 39]; + let ResourceCycles = [39]; let NumMicroOps = 4; } // Divide, X-form def : WriteRes { let Latency = 23; - let ResourceCycles = [13, 23]; + let ResourceCycles = [23]; let NumMicroOps = 4; } @@ -1252,7 +1252,7 @@ def : WriteRes { let Latency = 7; let NumMicroOps = 4; - let ResourceCycles = [4, 23]; + let ResourceCycles = [4]; } // ASIMD arith, reduce, 4H/4S Index: utils/TableGen/SubtargetEmitter.cpp =================================================================== --- utils/TableGen/SubtargetEmitter.cpp +++ utils/TableGen/SubtargetEmitter.cpp @@ -936,8 +936,7 @@ void SubtargetEmitter::ExpandProcResources(RecVec &PRVec, std::vector &Cycles, const CodeGenProcModel &PM) { - // Default to 1 resource cycle. - Cycles.resize(PRVec.size(), 1); + assert(PRVec.size() == Cycles.size() && "failed precondition"); for (unsigned i = 0, e = PRVec.size(); i != e; ++i) { Record *PRDef = PRVec[i]; RecVec SubResources; @@ -1105,6 +1104,21 @@ std::vector Cycles = WriteRes->getValueAsListOfInts("ResourceCycles"); + if (Cycles.empty()) { + // If ResourceCycles is not provided, default to one cycle per + // resource. + Cycles.resize(PRVec.size(), 1); + } else if (Cycles.size() != PRVec.size()) { + // If ResourceCycles is provided, check consistency. + PrintFatalError( + WriteRes->getLoc(), + Twine("Inconsistent resource cycles: !size(ResourceCycles) != " + "!size(ProcResources): ") + .concat(Twine(PRVec.size())) + .concat(" vs ") + .concat(Twine(Cycles.size()))); + } + ExpandProcResources(PRVec, Cycles, ProcModel); for (unsigned PRIdx = 0, PREnd = PRVec.size();