Index: llvm/lib/Target/ARM/ARMScheduleA57.td =================================================================== --- llvm/lib/Target/ARM/ARMScheduleA57.td +++ llvm/lib/Target/ARM/ARMScheduleA57.td @@ -282,7 +282,11 @@ // from similar μops, allowing a typical sequence of multiply-accumulate μops // to issue one every 1 cycle (sched advance = 2). def A57WriteMLA : SchedWriteRes<[A57UnitM]> { let Latency = 3; } -def A57WriteMLAL : SchedWriteRes<[A57UnitM]> { let Latency = 4; } +def A57WriteMLAL : SchedWriteVariant<[ + SchedVar, + SchedVar +]>; + def A57ReadMLA : SchedReadAdvance<2, [A57WriteMLA, A57WriteMLAL]>; def : InstRW<[A57WriteMLA], Index: llvm/test/tools/llvm-mca/ARM/cortex-a57-basic-instructions.s =================================================================== --- llvm/test/tools/llvm-mca/ARM/cortex-a57-basic-instructions.s +++ llvm/test/tools/llvm-mca/ARM/cortex-a57-basic-instructions.s @@ -1421,9 +1421,9 @@ # CHECK-NEXT: 1 3 1.00 smladeq r2, r3, r5, r8 # CHECK-NEXT: 1 3 1.00 smladxhi r2, r3, r5, r8 # CHECK-NEXT: 2 4 2.00 smlal r2, r3, r5, r8 -# CHECK-NEXT: 2 4 2.00 smlals r2, r3, r5, r8 +# CHECK-NEXT: 4 5 2.00 smlals r2, r3, r5, r8 # CHECK-NEXT: 2 4 2.00 smlaleq r2, r3, r5, r8 -# CHECK-NEXT: 2 4 2.00 smlalshi r2, r3, r5, r8 +# CHECK-NEXT: 4 5 2.00 smlalshi r2, r3, r5, r8 # CHECK-NEXT: 2 4 2.00 smlalbb r3, r1, r9, r0 # CHECK-NEXT: 2 4 2.00 smlalbt r5, r6, r4, r1 # CHECK-NEXT: 2 4 2.00 smlaltb r4, r2, r3, r2 @@ -1634,12 +1634,12 @@ # CHECK-NEXT: 2 4 2.00 umaallt r3, r4, r5, r6 # CHECK-NEXT: 2 4 2.00 umlal r2, r4, r6, r8 # CHECK-NEXT: 2 4 2.00 umlalgt r6, r1, r2, r6 -# CHECK-NEXT: 2 4 2.00 umlals r2, r9, r2, r3 -# CHECK-NEXT: 2 4 2.00 umlalseq r3, r5, r1, r2 +# CHECK-NEXT: 4 5 2.00 umlals r2, r9, r2, r3 +# CHECK-NEXT: 4 5 2.00 umlalseq r3, r5, r1, r2 # CHECK-NEXT: 2 4 2.00 umull r2, r4, r6, r8 # CHECK-NEXT: 2 4 2.00 umullgt r6, r1, r2, r6 -# CHECK-NEXT: 2 4 2.00 umulls r2, r9, r2, r3 -# CHECK-NEXT: 2 4 2.00 umullseq r3, r5, r1, r2 +# CHECK-NEXT: 4 5 2.00 umulls r2, r9, r2, r3 +# CHECK-NEXT: 4 5 2.00 umullseq r3, r5, r1, r2 # CHECK-NEXT: 1 2 1.00 uqadd16 r1, r2, r3 # CHECK-NEXT: 1 2 1.00 uqadd16gt r4, r7, r9 # CHECK-NEXT: 1 2 1.00 uqadd8 r3, r4, r8 @@ -1719,7 +1719,7 @@ # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1.0] [1.1] [2] [3] [4] [5] [6] -# CHECK-NEXT: 8.00 159.00 159.00 171.00 496.00 12.00 - - +# CHECK-NEXT: 8.00 165.00 165.00 211.00 456.00 12.00 - - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1.0] [1.1] [2] [3] [4] [5] [6] Instructions: @@ -2284,21 +2284,21 @@ # CHECK-NEXT: - - - - 1.00 - - - smladx r2, r3, r5, r8 # CHECK-NEXT: - - - - 1.00 - - - smladeq r2, r3, r5, r8 # CHECK-NEXT: - - - - 1.00 - - - smladxhi r2, r3, r5, r8 -# CHECK-NEXT: - - - - 2.00 - - - smlal r2, r3, r5, r8 -# CHECK-NEXT: - - - - 2.00 - - - smlals r2, r3, r5, r8 -# CHECK-NEXT: - - - - 2.00 - - - smlaleq r2, r3, r5, r8 -# CHECK-NEXT: - - - - 2.00 - - - smlalshi r2, r3, r5, r8 -# CHECK-NEXT: - - - - 2.00 - - - smlalbb r3, r1, r9, r0 -# CHECK-NEXT: - - - - 2.00 - - - smlalbt r5, r6, r4, r1 -# CHECK-NEXT: - - - - 2.00 - - - smlaltb r4, r2, r3, r2 -# CHECK-NEXT: - - - - 2.00 - - - smlaltt r8, r3, r8, r4 -# CHECK-NEXT: - - - - 2.00 - - - smlalbbge r3, r1, r9, r0 -# CHECK-NEXT: - - - - 2.00 - - - smlalbtle r5, r6, r4, r1 -# CHECK-NEXT: - - - - 2.00 - - - smlaltbne r4, r2, r3, r2 -# CHECK-NEXT: - - - - 2.00 - - - smlaltteq r8, r3, r8, r4 -# CHECK-NEXT: - - - - 2.00 - - - smlald r2, r3, r5, r8 +# CHECK-NEXT: - - - 2.00 - - - - smlal r2, r3, r5, r8 +# CHECK-NEXT: - 1.00 1.00 - 2.00 - - - smlals r2, r3, r5, r8 +# CHECK-NEXT: - - - 2.00 - - - - smlaleq r2, r3, r5, r8 +# CHECK-NEXT: - 1.00 1.00 - 2.00 - - - smlalshi r2, r3, r5, r8 +# CHECK-NEXT: - - - 2.00 - - - - smlalbb r3, r1, r9, r0 +# CHECK-NEXT: - - - 2.00 - - - - smlalbt r5, r6, r4, r1 +# CHECK-NEXT: - - - 2.00 - - - - smlaltb r4, r2, r3, r2 +# CHECK-NEXT: - - - 2.00 - - - - smlaltt r8, r3, r8, r4 +# CHECK-NEXT: - - - 2.00 - - - - smlalbbge r3, r1, r9, r0 +# CHECK-NEXT: - - - 2.00 - - - - smlalbtle r5, r6, r4, r1 +# CHECK-NEXT: - - - 2.00 - - - - smlaltbne r4, r2, r3, r2 +# CHECK-NEXT: - - - 2.00 - - - - smlaltteq r8, r3, r8, r4 +# CHECK-NEXT: - - - 2.00 - - - - smlald r2, r3, r5, r8 # CHECK-NEXT: - - - 2.00 - - - - smlaldx r2, r3, r5, r8 -# CHECK-NEXT: - - - - 2.00 - - - smlaldeq r2, r3, r5, r8 +# CHECK-NEXT: - - - 2.00 - - - - smlaldeq r2, r3, r5, r8 # CHECK-NEXT: - - - 2.00 - - - - smlaldxhi r2, r3, r5, r8 # CHECK-NEXT: - - - - 1.00 - - - smlawb r2, r3, r10, r8 # CHECK-NEXT: - - - - 1.00 - - - smlawt r8, r3, r5, r9 @@ -2308,9 +2308,9 @@ # CHECK-NEXT: - - - - 1.00 - - - smlsdx r2, r3, r5, r8 # CHECK-NEXT: - - - - 1.00 - - - smlsdeq r2, r3, r5, r8 # CHECK-NEXT: - - - - 1.00 - - - smlsdxhi r2, r3, r5, r8 -# CHECK-NEXT: - - - - 2.00 - - - smlsld r2, r9, r5, r1 +# CHECK-NEXT: - - - 2.00 - - - - smlsld r2, r9, r5, r1 # CHECK-NEXT: - - - 2.00 - - - - smlsldx r4, r11, r2, r8 -# CHECK-NEXT: - - - - 2.00 - - - smlsldeq r8, r2, r5, r6 +# CHECK-NEXT: - - - 2.00 - - - - smlsldeq r8, r2, r5, r6 # CHECK-NEXT: - - - 2.00 - - - - smlsldxhi r1, r0, r3, r8 # CHECK-NEXT: - - - - 1.00 - - - smmla r1, r2, r3, r4 # CHECK-NEXT: - - - - 1.00 - - - smmlar r4, r3, r2, r1 @@ -2494,16 +2494,16 @@ # CHECK-NEXT: - - - - 1.00 - - - uhsub16gt r4, r8, r2 # CHECK-NEXT: - - - - 1.00 - - - uhsub8 r4, r8, r2 # CHECK-NEXT: - - - - 1.00 - - - uhsub8gt r4, r8, r2 -# CHECK-NEXT: - - - - 2.00 - - - umaal r3, r4, r5, r6 -# CHECK-NEXT: - - - - 2.00 - - - umaallt r3, r4, r5, r6 -# CHECK-NEXT: - - - - 2.00 - - - umlal r2, r4, r6, r8 -# CHECK-NEXT: - - - - 2.00 - - - umlalgt r6, r1, r2, r6 -# CHECK-NEXT: - - - - 2.00 - - - umlals r2, r9, r2, r3 -# CHECK-NEXT: - - - - 2.00 - - - umlalseq r3, r5, r1, r2 -# CHECK-NEXT: - - - - 2.00 - - - umull r2, r4, r6, r8 -# CHECK-NEXT: - - - - 2.00 - - - umullgt r6, r1, r2, r6 -# CHECK-NEXT: - - - - 2.00 - - - umulls r2, r9, r2, r3 -# CHECK-NEXT: - - - - 2.00 - - - umullseq r3, r5, r1, r2 +# CHECK-NEXT: - - - 2.00 - - - - umaal r3, r4, r5, r6 +# CHECK-NEXT: - - - 2.00 - - - - umaallt r3, r4, r5, r6 +# CHECK-NEXT: - - - 2.00 - - - - umlal r2, r4, r6, r8 +# CHECK-NEXT: - - - 2.00 - - - - umlalgt r6, r1, r2, r6 +# CHECK-NEXT: - 1.00 1.00 - 2.00 - - - umlals r2, r9, r2, r3 +# CHECK-NEXT: - 1.00 1.00 - 2.00 - - - umlalseq r3, r5, r1, r2 +# CHECK-NEXT: - - - 2.00 - - - - umull r2, r4, r6, r8 +# CHECK-NEXT: - - - 2.00 - - - - umullgt r6, r1, r2, r6 +# CHECK-NEXT: - 1.00 1.00 - 2.00 - - - umulls r2, r9, r2, r3 +# CHECK-NEXT: - 1.00 1.00 - 2.00 - - - umullseq r3, r5, r1, r2 # CHECK-NEXT: - - - - 1.00 - - - uqadd16 r1, r2, r3 # CHECK-NEXT: - - - - 1.00 - - - uqadd16gt r4, r7, r9 # CHECK-NEXT: - - - - 1.00 - - - uqadd8 r3, r4, r8 Index: llvm/utils/TableGen/CodeGenSchedule.cpp =================================================================== --- llvm/utils/TableGen/CodeGenSchedule.cpp +++ llvm/utils/TableGen/CodeGenSchedule.cpp @@ -1609,13 +1609,17 @@ // // This is one step in a breadth-first search of nested variants. void PredTransitions::substituteVariants(const PredTransition &Trans) { + auto NewTrans = [&]() { + unsigned I = TransVec.size(); + TransVec.emplace_back(); + TransVec.back().PredTerm = Trans.PredTerm; + TransVec.back().ProcIndices = Trans.ProcIndices; + return I; + }; + // Build up a set of partial results starting at the back of // PredTransitions. Remember the first new transition. - unsigned StartIdx = TransVec.size(); - TransVec.emplace_back(); - TransVec.back().PredTerm = Trans.PredTerm; - TransVec.back().ProcIndices = Trans.ProcIndices; - + unsigned StartIdx = NewTrans(); // Visit each original write sequence. for (SmallVectorImpl>::const_iterator WSI = Trans.WriteSequences.begin(), WSE = Trans.WriteSequences.end(); @@ -1627,6 +1631,12 @@ } substituteVariantOperand(*WSI, /*IsRead=*/false, StartIdx); } + // We may have both read and write variant in the same sched class. + // If we've previously craeted valid write variant transition then create + // new empty transition for read variant, because read and write transition + // may belong to different processor models. + if (!Trans.ProcIndices[0] && TransVec[StartIdx].ProcIndices[0]) + StartIdx = NewTrans(); // Visit each original read sequence. for (SmallVectorImpl>::const_iterator RSI = Trans.ReadSequences.begin(), RSE = Trans.ReadSequences.end(); @@ -1638,6 +1648,14 @@ } substituteVariantOperand(*RSI, /*IsRead=*/true, StartIdx); } + // If we haven't substituted operand then just remove previously + // craeted empty transition. + // TODO: Merge read and write transitions if they share the same + // processor model. + if (!TransVec[StartIdx].ProcIndices[0]) { + assert(StartIdx == TransVec.size() - 1); + TransVec.pop_back(); + } } static void addSequences(CodeGenSchedModels &SchedModels,