diff --git a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td --- a/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td +++ b/llvm/lib/Target/RISCV/RISCVSchedSiFive7.td @@ -160,6 +160,44 @@ ); } +/// Cycles for reductions take approximately VL*SEW/DLEN + 5(4 + log(DLEN/SEW)) +/// cycles. +class SiFive7GetReductionCycles { + // VLUpperBound*SEW/DLEN is equivalent to 2*LMUL since + // VLUpperBound=(VLEN*LMUL)/SEW. + defvar VLEN = 512; + defvar DLEN = !div(VLEN, 2); + defvar TwoTimesLMUL = !cond( + !eq(mx, "M1") : 2, + !eq(mx, "M2") : 4, + !eq(mx, "M4") : 8, + !eq(mx, "M8") : 16, + !eq(mx, "MF2") : 1, + !eq(mx, "MF4") : 1, + !eq(mx, "MF8") : 1 + ); + int c = !add( + !div(TwoTimesLMUL, DLEN), + !mul(5, !add(4, !logtwo(!div(DLEN, sew)))) + ); +} + +/// Cycles for ordered reductions take approximatley 5*VL cycles +class SiFive7GetOrderedReductionCycles { + defvar VLEN = 512; + // (VLEN * LMUL) / SEW + defvar VLUpperBound = !cond( + !eq(mx, "M1") : !div(VLEN, sew), + !eq(mx, "M2") : !div(!mul(VLEN, 2), sew), + !eq(mx, "M4") : !div(!mul(VLEN, 4), sew), + !eq(mx, "M8") : !div(!mul(VLEN, 8), sew), + !eq(mx, "MF2") : !div(!div(VLEN, 2), sew), + !eq(mx, "MF4") : !div(!div(VLEN, 4), sew), + !eq(mx, "MF8") : !div(!div(VLEN, 8), sew), + ); + int c = !mul(5, VLUpperBound); +} + // SiFive7 machine model for scheduling and other instruction cost heuristics. def SiFive7Model : SchedMachineModel { let MicroOpBufferSize = 0; // Explicitly set to zero since SiFive7 is in-order. @@ -730,14 +768,55 @@ } // 14. Vector Reduction Operations -let Latency = 32 in { -defm "" : LMULSEWWriteRes<"WriteVIRedV_From", [SiFive7VA]>; -defm "" : LMULSEWWriteRes<"WriteVIWRedV_From", [SiFive7VA]>; -defm "" : LMULSEWWriteRes<"WriteVFRedV_From", [SiFive7VA]>; -defm "" : LMULSEWWriteRes<"WriteVFRedOV_From", [SiFive7VA]>; -defm "" : LMULSEWWriteResF<"WriteVFRedMinMaxV_From", [SiFive7VA]>; -defm "" : LMULSEWWriteResFWRed<"WriteVFWRedV_From", [SiFive7VA]>; -defm "" : LMULSEWWriteResFWRed<"WriteVFWRedOV_From", [SiFive7VA]>; +foreach mx = SchedMxList in { + foreach sew = SchedSEWSet.val in { + defvar Cycles = SiFive7GetReductionCycles.c; + defvar IsWorstCase = SiFive7IsWorstCaseMXSEW.c; + let Latency = Cycles, ResourceCycles = [Cycles] in + defm "" : LMULSEWWriteResMXSEW<"WriteVIRedV_From", [SiFive7VA], + mx, sew, IsWorstCase>; + } +} + +foreach mx = SchedMxListWRed in { + foreach sew = SchedSEWSet.val in { + defvar Cycles = SiFive7GetReductionCycles.c; + defvar IsWorstCase = SiFive7IsWorstCaseMXSEW.c; + let Latency = Cycles, ResourceCycles = [Cycles] in + defm "" : LMULSEWWriteResMXSEW<"WriteVIWRedV_From", [SiFive7VA], + mx, sew, IsWorstCase>; + } +} + +foreach mx = SchedMxListF in { + foreach sew = SchedSEWSetF.val in { + defvar RedCycles = SiFive7GetReductionCycles.c; + defvar IsWorstCase = SiFive7IsWorstCaseMXSEW.c; + let Latency = RedCycles, ResourceCycles = [RedCycles] in { + defm "" : LMULSEWWriteResMXSEW<"WriteVFRedV_From", [SiFive7VA], + mx, sew, IsWorstCase>; + defm "" : LMULSEWWriteResMXSEW<"WriteVFRedMinMaxV_From", [SiFive7VA], + mx, sew, IsWorstCase>; + } + defvar OrdRedCycles = SiFive7GetOrderedReductionCycles.c; + let Latency = OrdRedCycles, ResourceCycles = [OrdRedCycles] in + defm "" : LMULSEWWriteResMXSEW<"WriteVFRedOV_From", [SiFive7VA], + mx, sew, IsWorstCase>; + } +} + +foreach mx = SchedMxListFWRed in { + foreach sew = SchedSEWSetF.val in { + defvar RedCycles = SiFive7GetReductionCycles.c; + defvar IsWorstCase = SiFive7IsWorstCaseMXSEW.c; + let Latency = RedCycles, ResourceCycles = [RedCycles] in + defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedV_From", [SiFive7VA], + mx, sew, IsWorstCase>; + defvar OrdRedCycles = SiFive7GetOrderedReductionCycles.c; + let Latency = OrdRedCycles, ResourceCycles = [OrdRedCycles] in + defm "" : LMULSEWWriteResMXSEW<"WriteVFWRedOV_From", [SiFive7VA], + mx, sew, IsWorstCase>; + } } // 15. Vector Mask Instructions