diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td --- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td +++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td @@ -8,6 +8,131 @@ //===----------------------------------------------------------------------===// +/// c is true if mx has the worst case behavior compared to LMULs in MxList. +/// On the SiFive7, the worst case LMUL is the Largest LMUL +/// and the worst case sew is the smallest SEW for that LMUL. +class SiFive7IsWorstCaseMX MxList> { + defvar LLMUL = LargestLMUL.r; + bit c = !eq(mx, LLMUL); +} + +/// c is true if mx and sew have the worst case behavior compared to LMULs in +/// MxList. On the SiFive7, the worst case LMUL is the Largest LMUL +/// and the worst case sew is the smallest SEW for that LMUL. +class SiFive7IsWorstCaseMXSEW MxList, + bit isF = 0> { + defvar LLMUL = LargestLMUL.r; + defvar SSEW = SmallestSEW.r; + bit c = !and(!eq(mx, LLMUL), !eq(sew, SSEW)); +} + +class SiFive7GetCyclesDefault { + int c = !cond( + !eq(mx, "M1") : 2, + !eq(mx, "M2") : 4, + !eq(mx, "M4") : 8, + !eq(mx, "M8") : 16, + !eq(mx, "MF2") : 1, + !eq(mx, "MF4") : 1, + !eq(mx, "MF8") : 1 + ); +} + +class SiFive7GetCyclesWidening { + int c = !cond( + !eq(mx, "M1") : 2, + !eq(mx, "M2") : 4, + !eq(mx, "M4") : 8, + !eq(mx, "MF2") : 1, + !eq(mx, "MF4") : 1, + !eq(mx, "MF8") : 1 + ); +} + +class SiFive7GetCyclesNarrowing { + int c = !cond( + !eq(mx, "M1") : 4, + !eq(mx, "M2") : 8, + !eq(mx, "M4") : 16, + !eq(mx, "MF2") : 2, + !eq(mx, "MF4") : 1, + !eq(mx, "MF8") : 1 + ); +} + +class SiFive7GetCyclesOutputLMUL { + int c = !cond( + !eq(mx, "M1") : 1, + !eq(mx, "M2") : 2, + !eq(mx, "M4") : 4, + !eq(mx, "M8") : 8, + !eq(mx, "MF2") : 1, + !eq(mx, "MF4") : 1, + !eq(mx, "MF8") : 1 + ); +} + +class SiFive7GetCyclesVMask { + int c = !cond( + !eq(mx, "M1") : 1, + !eq(mx, "M2") : 1, + !eq(mx, "M4") : 1, + !eq(mx, "M8") : 2, + !eq(mx, "MF2") : 1, + !eq(mx, "MF4") : 1, + !eq(mx, "MF8") : 1 + ); +} + +// Cycles for segmented loads and stores are calculated using the +// formula ceil(2 * nf * lmul). +class SiFive7GetCyclesSegmented { + int c = !cond( + !eq(mx, "M1") : !mul(!mul(2, nf), 1), + !eq(mx, "M2") : !mul(!mul(2, nf), 2), + !eq(mx, "M4") : !mul(!mul(2, nf), 4), + !eq(mx, "M8") : !mul(!mul(2, nf), 8), + // We can calculate ceil(a/b) using (a + b - 1) / b. + // Since the multiplication of fractional lmul is the + // same as division by the denominator the formula we + // use is ceil(2 * nf / lmul_denominator). We can use + // ceil(a/b) where a = 2 * nf, b = lmul_denominator. + !eq(mx, "MF2") : !div(!sub(!add(!mul(2, nf), 2), 1), 2), + !eq(mx, "MF4") : !div(!sub(!add(!mul(2, nf), 4), 1), 4), + !eq(mx, "MF8") : !div(!sub(!add(!mul(2, nf), 8), 1), 8) + ); +} + +class SiFive7GetCyclesOnePerElement { + // FIXME: On SiFive7, VLEN is 512. Although a user can request the compiler + // to use a different VLEN, this model will not make scheduling decisions + // based on the user specified VLEN. + // c = ceil(VLEN / SEW) * LMUL + // Note: c >= 1 since the smallest VLUpperBound is 512 / 8 = 8, and the + // largest division performed on VLUpperBound is in MF8 case with division + // by 8. Therefore, there is no need to ceil the result. + int VLUpperBound = !div(512, sew); + int c = !cond( + !eq(mx, "M1") : VLUpperBound, + !eq(mx, "M2") : !mul(VLUpperBound, 2), + !eq(mx, "M4") : !mul(VLUpperBound, 4), + !eq(mx, "M8") : !mul(VLUpperBound, 8), + !eq(mx, "MF2") : !div(VLUpperBound, 2), + !eq(mx, "MF4") : !div(VLUpperBound, 4), + !eq(mx, "MF8") : !div(VLUpperBound, 8) + ); +} + +class SiFive7GetDivOrSqrtFactor { + int c = !cond( + // TODO: Add SchedSEWSetFP upstream and remove the SEW=8 case. + !eq(sew, 8) : 15, + !eq(sew, 16) : 15, + !eq(sew, 32) : 28, + !eq(sew, 64) : 57 + ); +} + // SiFive7 machine model for scheduling and other instruction cost heuristics. def SiFive7Model : SchedMachineModel { let MicroOpBufferSize = 0; // Explicitly set to zero since SiFive7 is in-order. @@ -18,22 +143,27 @@ let UnsupportedFeatures = [HasStdExtZbkb, HasStdExtZbkc, HasStdExtZbkx, HasStdExtZcmt, HasStdExtZknd, HasStdExtZkne, HasStdExtZknh, HasStdExtZksed, HasStdExtZksh, - HasStdExtZkr, HasVInstructions]; + HasStdExtZkr]; } -// The SiFive7 microarchitecture has two pipelines: A and B. +// The SiFive7 microarchitecture has three pipelines: A, B, V. // Pipe A can handle memory, integer alu and vector operations. // Pipe B can handle integer alu, control flow, integer multiply and divide, // and floating point computation. +// Pipe V can handle the V extension. let SchedModel = SiFive7Model in { let BufferSize = 0 in { def SiFive7PipeA : ProcResource<1>; def SiFive7PipeB : ProcResource<1>; +def SiFive7PipeV : ProcResource<1>; } let BufferSize = 1 in { def SiFive7IDiv : ProcResource<1> { let Super = SiFive7PipeB; } // Int Division def SiFive7FDiv : ProcResource<1> { let Super = SiFive7PipeB; } // FP Division/Sqrt +def SiFive7VA : ProcResource<1> { let Super = SiFive7PipeV; } // Arithmetic sequencer +def SiFive7VL : ProcResource<1> { let Super = SiFive7PipeV; } // Load sequencer +def SiFive7VS : ProcResource<1> { let Super = SiFive7PipeV; } // Store sequencer } def SiFive7PipeAB : ProcResGroup<[SiFive7PipeA, SiFive7PipeB]>; @@ -221,9 +351,346 @@ def : WriteRes; } +// 6. Configuration-Setting Instructions +let Latency = 3 in { +def : WriteRes; +def : WriteRes; +def : WriteRes; +} + +// 7. Vector Loads and Stores +foreach mx = SchedMxList in { + defvar Cycles = SiFive7GetCyclesDefault.c; + defvar IsWorstCase = SiFive7IsWorstCaseMX.c; + let Latency = Cycles, ResourceCycles = [Cycles] in { + defm "" : LMULWriteResMX<"WriteVLDE", [SiFive7VL], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSTE", [SiFive7VS], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVLDM", [SiFive7VL], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSTM", [SiFive7VS], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVLDS8", [SiFive7VL], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVLDS16", [SiFive7VL], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVLDS32", [SiFive7VL], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVLDS64", [SiFive7VL], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSTS8", [SiFive7VS], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSTS16", [SiFive7VS], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSTS32", [SiFive7VS], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSTS64", [SiFive7VS], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVLDUX8", [SiFive7VL], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVLDUX16", [SiFive7VL], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVLDUX32", [SiFive7VL], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVLDUX64", [SiFive7VL], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVLDOX8", [SiFive7VL], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVLDOX16", [SiFive7VL], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVLDOX32", [SiFive7VL], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVLDOX64", [SiFive7VL], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSTUX8", [SiFive7VS], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSTUX16", [SiFive7VS], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSTUX32", [SiFive7VS], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSTUX64", [SiFive7VS], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSTOX8", [SiFive7VS], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSTOX16", [SiFive7VS], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSTOX32", [SiFive7VS], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSTOX64", [SiFive7VS], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVLDFF", [SiFive7VL], mx, IsWorstCase>; + } +} + +// VLD*R is LMUL aware +let Latency = 4, ResourceCycles = [2] in + def : WriteRes; +let Latency = 4, ResourceCycles = [4] in + def : WriteRes; +let Latency = 4, ResourceCycles = [8] in + def : WriteRes; +let Latency = 4, ResourceCycles = [16] in + def : WriteRes; +// VST*R is LMUL aware +let Latency = 1, ResourceCycles = [2] in + def : WriteRes; +let Latency = 1, ResourceCycles = [4] in + def : WriteRes; +let Latency = 1, ResourceCycles = [8] in + def : WriteRes; +let Latency = 1, ResourceCycles = [16] in + def : WriteRes; + +foreach mx = SchedMxList in { + foreach nf=2-8 in { + foreach eew = [8, 16, 32, 64] in { + defvar Cycles = SiFive7GetCyclesSegmented.c; + defvar IsWorstCase = SiFive7IsWorstCaseMX.c; + let Latency = Cycles, ResourceCycles = [Cycles] in { + defm "" : LMULWriteResMX<"WriteVLSEG" # nf # "e" # eew, [SiFive7VL], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVLSEGFF" # nf # "e" # eew, [SiFive7VL], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVLSSEG" # nf # "e" # eew, [SiFive7VL], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVLUXSEG" # nf # "e" # eew, [SiFive7VL], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVLOXSEG" # nf # "e" # eew, [SiFive7VL], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSSEG" # nf # "e" # eew, [SiFive7VS], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSSSEG" # nf # "e" # eew, [SiFive7VS], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSUXSEG" # nf # "e" # eew, [SiFive7VS], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSOXSEG" # nf # "e" # eew, [SiFive7VS], mx, IsWorstCase>; + } + } + } +} + +// 11. Vector Integer Arithmetic Instructions +foreach mx = SchedMxList in { + defvar Cycles = SiFive7GetCyclesDefault.c; + defvar IsWorstCase = SiFive7IsWorstCaseMX.c; + let Latency = 4, ResourceCycles = [Cycles] in { + defm "" : LMULWriteResMX<"WriteVIALUV", [SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIALUX", [SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIALUI", [SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVICALUV", [SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVICALUX", [SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVICALUI", [SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVShiftV", [SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVShiftX", [SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVShiftI", [SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVICmpV", [SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVICmpX", [SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVICmpI", [SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIMulV", [SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIMulX", [SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIMulAddV", [SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIMulAddX", [SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIMergeV", [SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIMergeX", [SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIMergeI", [SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIMovV", [SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIMovX", [SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIMovI", [SiFive7VA], mx, IsWorstCase>; + } +} +foreach mx = SchedMxList in { + defvar Cycles = SiFive7GetCyclesOutputLMUL.c; + defvar IsWorstCase = SiFive7IsWorstCaseMX.c; + let Latency = 4, ResourceCycles = [Cycles] in { + defm "" : LMULWriteResMX<"WriteVExtV", [SiFive7VA], mx, IsWorstCase>; + } +} +foreach mx = SchedMxList in { + foreach sew = SchedSEWSet.val in { + defvar Cycles = !mul(SiFive7GetDivOrSqrtFactor.c, + !div(SiFive7GetCyclesOnePerElement.c, 4)); + defvar IsWorstCase = SiFive7IsWorstCaseMXSEW.c; + let Latency = Cycles, ResourceCycles = [Cycles] in { + defm "" : LMULSEWWriteResMXSEW<"WriteVIDivV", [SiFive7VA], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVIDivX", [SiFive7VA], mx, sew, IsWorstCase>; + } + } +} + +// Widening +foreach mx = SchedMxListW in { + defvar Cycles = SiFive7GetCyclesWidening.c; + defvar IsWorstCase = SiFive7IsWorstCaseMX.c; + let Latency = 8, ResourceCycles = [Cycles] in { + defm "" : LMULWriteResMX<"WriteVIWALUV", [SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIWALUX", [SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIWALUI", [SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIWMulV", [SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIWMulX", [SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIWMulAddV", [SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIWMulAddX", [SiFive7VA], mx, IsWorstCase>; + } +} +// Narrowing +foreach mx = SchedMxListW in { + defvar Cycles = SiFive7GetCyclesNarrowing.c; + defvar IsWorstCase = SiFive7IsWorstCaseMX.c; + let Latency = 8, ResourceCycles = [Cycles] in { + defm "" : LMULWriteResMX<"WriteVNShiftV", [SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVNShiftX", [SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVNShiftI", [SiFive7VA], mx, IsWorstCase>; + } +} + +// 12. Vector Fixed-Point Arithmetic Instructions +foreach mx = SchedMxList in { + defvar Cycles = SiFive7GetCyclesDefault.c; + defvar IsWorstCase = SiFive7IsWorstCaseMX.c; + let Latency = 8, ResourceCycles = [Cycles] in { + defm "" : LMULWriteResMX<"WriteVSALUV", [SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSALUX", [SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSALUI", [SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVAALUV", [SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVAALUX", [SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSMulV", [SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSMulX", [SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSShiftV", [SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSShiftX", [SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVSShiftI", [SiFive7VA], mx, IsWorstCase>; + } +} +// Narrowing +foreach mx = SchedMxListW in { + defvar Cycles = SiFive7GetCyclesNarrowing.c; + defvar IsWorstCase = SiFive7IsWorstCaseMX.c; + let Latency = 8, ResourceCycles = [Cycles] in { + defm "" : LMULWriteResMX<"WriteVNClipV", [SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVNClipX", [SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVNClipI", [SiFive7VA], mx, IsWorstCase>; + } +} + +// 13. Vector Floating-Point Instructions +foreach mx = SchedMxList in { + defvar Cycles = SiFive7GetCyclesDefault.c; + defvar IsWorstCase = SiFive7IsWorstCaseMX.c; + let Latency = 8, ResourceCycles = [Cycles] in { + defm "" : LMULWriteResMX<"WriteVFALUV", [SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVFALUF", [SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVFMulV", [SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVFMulF", [SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVFMulAddV", [SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVFMulAddF", [SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVFRecpV", [SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVFCmpV", [SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVFCmpF", [SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVFSgnjV", [SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVFSgnjF", [SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVFClassV", [SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVFMergeV", [SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVFMovV", [SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVFCvtIToFV", [SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVFCvtFToIV", [SiFive7VA], mx, IsWorstCase>; + } +} +foreach mx = SchedMxListF in { + foreach sew = SchedSEWSetF.val in { + defvar Cycles = !mul(SiFive7GetDivOrSqrtFactor.c, + !div(SiFive7GetCyclesOnePerElement.c, 4)); + defvar IsWorstCase = SiFive7IsWorstCaseMXSEW.c; + let Latency = Cycles, ResourceCycles = [Cycles] in { + defm "" : LMULSEWWriteResMXSEW<"WriteVFSqrtV", [SiFive7VA], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFDivV", [SiFive7VA], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFDivF", [SiFive7VA], mx, sew, IsWorstCase>; + } + } +} + +// Widening +foreach mx = SchedMxListW in { + defvar Cycles = SiFive7GetCyclesWidening.c; + defvar IsWorstCase = SiFive7IsWorstCaseMX.c; + let Latency = 8, ResourceCycles = [Cycles] in { + defm "" : LMULWriteResMX<"WriteVFWCvtIToFV", [SiFive7VA], mx, IsWorstCase>; + } +} +foreach mx = SchedMxListFW in { + defvar Cycles = SiFive7GetCyclesWidening.c; + defvar IsWorstCase = SiFive7IsWorstCaseMX.c; + let Latency = 8, ResourceCycles = [Cycles] in { + defm "" : LMULWriteResMX<"WriteVFWALUV", [SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVFWMulV", [SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVFWMulAddV", [SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVFWCvtFToIV", [SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVFWCvtFToFV", [SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVFWMulAddF", [SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVFWMulF", [SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVFWALUF", [SiFive7VA], mx, IsWorstCase>; + } +} +// Narrowing +foreach mx = SchedMxListW in { + defvar Cycles = SiFive7GetCyclesNarrowing.c; + defvar IsWorstCase = SiFive7IsWorstCaseMX.c; + let Latency = 8, ResourceCycles = [Cycles] in { + defm "" : LMULWriteResMX<"WriteVFNCvtFToIV", [SiFive7VA], mx, IsWorstCase>; + } +} +foreach mx = SchedMxListFW in { + defvar Cycles = SiFive7GetCyclesNarrowing.c; + defvar IsWorstCase = SiFive7IsWorstCaseMX.c; + let Latency = 8, ResourceCycles = [Cycles] in { + defm "" : LMULWriteResMX<"WriteVFNCvtIToFV", [SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVFNCvtFToFV", [SiFive7VA], mx, IsWorstCase>; + } +} + +// 14. Vector Reduction Operations +let Latency = 32 in { +defm "" : LMULWriteRes<"WriteVIRedV_From", [SiFive7VA]>; +defm "" : LMULWriteRes<"WriteVIWRedV_From", [SiFive7VA]>; +defm "" : LMULWriteRes<"WriteVFRedV_From", [SiFive7VA]>; +defm "" : LMULWriteRes<"WriteVFRedOV_From", [SiFive7VA]>; +defm "" : LMULWriteResFWRed<"WriteVFWRedV_From", [SiFive7VA]>; +defm "" : LMULWriteResFWRed<"WriteVFWRedOV_From", [SiFive7VA]>; +} + +// 15. Vector Mask Instructions +foreach mx = SchedMxList in { + defvar Cycles = SiFive7GetCyclesVMask.c; + defvar IsWorstCase = SiFive7IsWorstCaseMX.c; + let Latency = 4, ResourceCycles = [Cycles] in { + defm "" : LMULWriteResMX<"WriteVMALUV", [SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVMPopV", [SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVMFFSV", [SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVMSFSV", [SiFive7VA], mx, IsWorstCase>; + } +} +foreach mx = SchedMxList in { + defvar Cycles = SiFive7GetCyclesDefault.c; + defvar IsWorstCase = SiFive7IsWorstCaseMX.c; + let Latency = 4, ResourceCycles = [Cycles] in { + defm "" : LMULWriteResMX<"WriteVMIotV", [SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVMIdxV", [SiFive7VA], mx, IsWorstCase>; + } +} + +// 16. Vector Permutation Instructions +foreach mx = SchedMxList in { + defvar Cycles = SiFive7GetCyclesDefault.c; + defvar IsWorstCase = SiFive7IsWorstCaseMX.c; + let Latency = 8, ResourceCycles = [Cycles] in { + defm "" : LMULWriteResMX<"WriteVIMovVX", [SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVIMovXV", [SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVFMovVF", [SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVFMovFV", [SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVRGatherVX", [SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVRGatherVI", [SiFive7VA], mx, IsWorstCase>; + } +} + +foreach mx = SchedMxList in { + foreach sew = SchedSEWSet.val in { + defvar Cycles = SiFive7GetCyclesOnePerElement.c; + defvar IsWorstCase = SiFive7IsWorstCaseMXSEW.c; + let Latency = !add(Cycles, 3), ResourceCycles = [Cycles] in { + defm "" : LMULSEWWriteResMXSEW<"WriteVRGatherVV", [SiFive7VA], mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVCompressV", [SiFive7VA], mx, sew, IsWorstCase>; + } + } +} + +foreach mx = SchedMxList in { + defvar Cycles = SiFive7GetCyclesDefault.c; + defvar IsWorstCase = SiFive7IsWorstCaseMX.c; + let Latency = 4, ResourceCycles = [Cycles] in { + defm "" : LMULWriteResMX<"WriteVISlideX", [SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVISlideI", [SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVISlide1X", [SiFive7VA], mx, IsWorstCase>; + defm "" : LMULWriteResMX<"WriteVFSlide1F", [SiFive7VA], mx, IsWorstCase>; + } +} + +// VMov*V is LMUL Aware +let Latency = 4, ResourceCycles = [2] in + def : WriteRes; +let Latency = 4, ResourceCycles = [4] in + def : WriteRes; +let Latency = 4, ResourceCycles = [8] in + def : WriteRes; +let Latency = 4, ResourceCycles = [16] in + def : WriteRes; + // Others def : WriteRes; def : WriteRes; +let Latency = 3 in + def : WriteRes; def : InstRW<[WriteIALU], (instrs COPY)>; @@ -324,9 +791,171 @@ def : ReadAdvance; def : ReadAdvance; +// 6. Configuration-Setting Instructions +def : ReadAdvance; +def : ReadAdvance; + +// 7. Vector Loads and Stores +def : ReadAdvance; +def : ReadAdvance; +defm "" : LMULReadAdvance<"ReadVSTEV", 0>; +defm "" : LMULReadAdvance<"ReadVSTM", 0>; +def : ReadAdvance; +def : ReadAdvance; +defm "" : LMULReadAdvance<"ReadVSTS8V", 0>; +defm "" : LMULReadAdvance<"ReadVSTS16V", 0>; +defm "" : LMULReadAdvance<"ReadVSTS32V", 0>; +defm "" : LMULReadAdvance<"ReadVSTS64V", 0>; +defm "" : LMULReadAdvance<"ReadVLDUXV", 0>; +defm "" : LMULReadAdvance<"ReadVLDOXV", 0>; +defm "" : LMULReadAdvance<"ReadVSTUX8", 0>; +defm "" : LMULReadAdvance<"ReadVSTUX16", 0>; +defm "" : LMULReadAdvance<"ReadVSTUX32", 0>; +defm "" : LMULReadAdvance<"ReadVSTUX64", 0>; +defm "" : LMULReadAdvance<"ReadVSTUXV", 0>; +defm "" : LMULReadAdvance<"ReadVSTUX8V", 0>; +defm "" : LMULReadAdvance<"ReadVSTUX16V", 0>; +defm "" : LMULReadAdvance<"ReadVSTUX32V", 0>; +defm "" : LMULReadAdvance<"ReadVSTUX64V", 0>; +defm "" : LMULReadAdvance<"ReadVSTOX8", 0>; +defm "" : LMULReadAdvance<"ReadVSTOX16", 0>; +defm "" : LMULReadAdvance<"ReadVSTOX32", 0>; +defm "" : LMULReadAdvance<"ReadVSTOX64", 0>; +defm "" : LMULReadAdvance<"ReadVSTOXV", 0>; +defm "" : LMULReadAdvance<"ReadVSTOX8V", 0>; +defm "" : LMULReadAdvance<"ReadVSTOX16V", 0>; +defm "" : LMULReadAdvance<"ReadVSTOX32V", 0>; +defm "" : LMULReadAdvance<"ReadVSTOX64V", 0>; +// LMUL Aware +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; + +// 12. Vector Integer Arithmetic Instructions +defm : LMULReadAdvance<"ReadVIALUV", 0>; +defm : LMULReadAdvance<"ReadVIALUX", 0>; +defm : LMULReadAdvanceW<"ReadVIWALUV", 0>; +defm : LMULReadAdvanceW<"ReadVIWALUX", 0>; +defm : LMULReadAdvance<"ReadVExtV", 0>; +defm : LMULReadAdvance<"ReadVICALUV", 0>; +defm : LMULReadAdvance<"ReadVICALUX", 0>; +defm : LMULReadAdvance<"ReadVShiftV", 0>; +defm : LMULReadAdvance<"ReadVShiftX", 0>; +defm : LMULReadAdvanceW<"ReadVNShiftV", 0>; +defm : LMULReadAdvanceW<"ReadVNShiftX", 0>; +defm : LMULReadAdvance<"ReadVICmpV", 0>; +defm : LMULReadAdvance<"ReadVICmpX", 0>; +defm : LMULReadAdvance<"ReadVIMulV", 0>; +defm : LMULReadAdvance<"ReadVIMulX", 0>; +defm : LMULSEWReadAdvance<"ReadVIDivV", 0>; +defm : LMULSEWReadAdvance<"ReadVIDivX", 0>; +defm : LMULReadAdvanceW<"ReadVIWMulV", 0>; +defm : LMULReadAdvanceW<"ReadVIWMulX", 0>; +defm : LMULReadAdvance<"ReadVIMulAddV", 0>; +defm : LMULReadAdvance<"ReadVIMulAddX", 0>; +defm : LMULReadAdvanceW<"ReadVIWMulAddV", 0>; +defm : LMULReadAdvanceW<"ReadVIWMulAddX", 0>; +defm : LMULReadAdvance<"ReadVIMergeV", 0>; +defm : LMULReadAdvance<"ReadVIMergeX", 0>; +defm : LMULReadAdvance<"ReadVIMovV", 0>; +defm : LMULReadAdvance<"ReadVIMovX", 0>; + +// 13. Vector Fixed-Point Arithmetic Instructions +defm "" : LMULReadAdvance<"ReadVSALUV", 0>; +defm "" : LMULReadAdvance<"ReadVSALUX", 0>; +defm "" : LMULReadAdvance<"ReadVAALUV", 0>; +defm "" : LMULReadAdvance<"ReadVAALUX", 0>; +defm "" : LMULReadAdvance<"ReadVSMulV", 0>; +defm "" : LMULReadAdvance<"ReadVSMulX", 0>; +defm "" : LMULReadAdvance<"ReadVSShiftV", 0>; +defm "" : LMULReadAdvance<"ReadVSShiftX", 0>; +defm "" : LMULReadAdvanceW<"ReadVNClipV", 0>; +defm "" : LMULReadAdvanceW<"ReadVNClipX", 0>; + +// 14. Vector Floating-Point Instructions +defm "" : LMULReadAdvance<"ReadVFALUV", 0>; +defm "" : LMULReadAdvance<"ReadVFALUF", 0>; +defm "" : LMULReadAdvanceFW<"ReadVFWALUV", 0>; +defm "" : LMULReadAdvanceFW<"ReadVFWALUF", 0>; +defm "" : LMULReadAdvance<"ReadVFMulV", 0>; +defm "" : LMULReadAdvance<"ReadVFMulF", 0>; +defm "" : LMULSEWReadAdvanceF<"ReadVFDivV", 0>; +defm "" : LMULSEWReadAdvanceF<"ReadVFDivF", 0>; +defm "" : LMULReadAdvanceFW<"ReadVFWMulV", 0>; +defm "" : LMULReadAdvanceFW<"ReadVFWMulF", 0>; +defm "" : LMULReadAdvance<"ReadVFMulAddV", 0>; +defm "" : LMULReadAdvance<"ReadVFMulAddF", 0>; +defm "" : LMULReadAdvanceFW<"ReadVFWMulAddV", 0>; +defm "" : LMULReadAdvanceFW<"ReadVFWMulAddF", 0>; +defm "" : LMULSEWReadAdvanceF<"ReadVFSqrtV", 0>; +defm "" : LMULReadAdvance<"ReadVFRecpV", 0>; +defm "" : LMULReadAdvance<"ReadVFCmpV", 0>; +defm "" : LMULReadAdvance<"ReadVFCmpF", 0>; +defm "" : LMULReadAdvance<"ReadVFSgnjV", 0>; +defm "" : LMULReadAdvance<"ReadVFSgnjF", 0>; +defm "" : LMULReadAdvance<"ReadVFClassV", 0>; +defm "" : LMULReadAdvance<"ReadVFMergeV", 0>; +defm "" : LMULReadAdvance<"ReadVFMergeF", 0>; +defm "" : LMULReadAdvance<"ReadVFMovF", 0>; +defm "" : LMULReadAdvance<"ReadVFCvtIToFV", 0>; +defm "" : LMULReadAdvance<"ReadVFCvtFToIV", 0>; +defm "" : LMULReadAdvanceW<"ReadVFWCvtIToFV", 0>; +defm "" : LMULReadAdvanceFW<"ReadVFWCvtFToIV", 0>; +defm "" : LMULReadAdvanceFW<"ReadVFWCvtFToFV", 0>; +defm "" : LMULReadAdvanceFW<"ReadVFNCvtIToFV", 0>; +defm "" : LMULReadAdvanceW<"ReadVFNCvtFToIV", 0>; +defm "" : LMULReadAdvanceFW<"ReadVFNCvtFToFV", 0>; + +// 15. Vector Reduction Operations +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; + +// 16. Vector Mask Instructions +defm "" : LMULReadAdvance<"ReadVMALUV", 0>; +defm "" : LMULReadAdvance<"ReadVMPopV", 0>; +defm "" : LMULReadAdvance<"ReadVMFFSV", 0>; +defm "" : LMULReadAdvance<"ReadVMSFSV", 0>; +defm "" : LMULReadAdvance<"ReadVMIotV", 0>; + +// 17. Vector Permutation Instructions +defm "" : LMULReadAdvance<"ReadVIMovVX", 0>; +defm "" : LMULReadAdvance<"ReadVIMovXV", 0>; +defm "" : LMULReadAdvance<"ReadVIMovXX", 0>; +defm "" : LMULReadAdvance<"ReadVFMovVF", 0>; +defm "" : LMULReadAdvance<"ReadVFMovFV", 0>; +defm "" : LMULReadAdvance<"ReadVFMovFX", 0>; +defm "" : LMULReadAdvance<"ReadVISlideV", 0>; +defm "" : LMULReadAdvance<"ReadVISlideX", 0>; +defm "" : LMULReadAdvance<"ReadVFSlideV", 0>; +defm "" : LMULReadAdvance<"ReadVFSlideF", 0>; +defm "" : LMULSEWReadAdvance<"ReadVRGatherVV_data", 0>; +defm "" : LMULSEWReadAdvance<"ReadVRGatherVV_index", 0>; +defm "" : LMULReadAdvance<"ReadVRGatherVX_data", 0>; +defm "" : LMULReadAdvance<"ReadVRGatherVX_index", 0>; +defm "" : LMULReadAdvance<"ReadVRGatherVI_data", 0>; +defm "" : LMULSEWReadAdvance<"ReadVCompressV", 0>; +// LMUL Aware +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; + +// Others +def : ReadAdvance; + //===----------------------------------------------------------------------===// // Unsupported extensions -defm : UnsupportedSchedV; defm : UnsupportedSchedZbc; defm : UnsupportedSchedZbs; defm : UnsupportedSchedZbkb; diff --git a/llvm/lib/Target/RISCV/RISCVScheduleV.td b/llvm/lib/Target/RISCV/RISCVScheduleV.td --- a/llvm/lib/Target/RISCV/RISCVScheduleV.td +++ b/llvm/lib/Target/RISCV/RISCVScheduleV.td @@ -9,7 +9,7 @@ //===----------------------------------------------------------------------===// /// Define scheduler resources associated with def operands. -defvar SchedMxList = ["M1", "M2", "M4", "M8", "MF2", "MF4", "MF8"]; +defvar SchedMxList = ["MF8", "MF4", "MF2", "M1", "M2", "M4", "M8"]; // Used for widening and narrowing instructions as it doesn't contain M8. defvar SchedMxListW = !listremove(SchedMxList, ["M8"]); defvar SchedMxListFW = !listremove(SchedMxList, ["M8", "MF8"]); @@ -38,6 +38,32 @@ !eq(mx, "MF4"): [16]); } +// Helper function to get the largest LMUL from MxList +// Precondition: MxList is sorted in ascending LMUL order. +class LargestLMUL MxList> { + // MX list is sorted from smallest to largest + string r = !foldl(!head(MxList), MxList, last, curr, curr); +} +// Helper function to get the smallest SEW that can be used with LMUL mx +// Precondition: MxList is sorted in ascending LMUL order and SchedSEWSet +class SmallestSEW { + int r = !head(!if(isF, SchedSEWSetF.val, SchedSEWSet.val)); +} + +// Creates WriteRes for (name, mx, resources) tuple +multiclass LMULWriteResMX resources, + string mx, bit IsWorstCase> { + def : WriteRes(name # "_" # mx), resources>; + if IsWorstCase then + def : WriteRes(name # "_WorstCase"), resources>; +} +multiclass LMULSEWWriteResMXSEW resources, + string mx, int sew, bit IsWorstCase> { + def : WriteRes(name # "_" # mx # "_E" # sew), resources>; + if IsWorstCase then + def : WriteRes(name # "_WorstCase"), resources>; +} + // Define multiclasses to define SchedWrite, SchedRead, WriteRes, and // ReadAdvance for each (name, LMUL) pair and for each LMUL in each of the // SchedMxList variants above. Each multiclass is responsible for defining