Index: include/llvm/CodeGen/TargetSchedule.h =================================================================== --- include/llvm/CodeGen/TargetSchedule.h +++ include/llvm/CodeGen/TargetSchedule.h @@ -93,6 +93,10 @@ /// \brief Maximum number of micro-ops that may be scheduled per cycle. unsigned getIssueWidth() const { return SchedModel.IssueWidth; } + /// \brief Return true if instruction cannot be dual-issued with another. + bool isSingleIssue(const MachineInstr *MI, + const MCSchedClassDesc *SC = nullptr) const; + /// \brief Return the number of issue slots required for this MI. unsigned getNumMicroOps(const MachineInstr *MI, const MCSchedClassDesc *SC = nullptr) const; @@ -178,6 +182,7 @@ bool UseDefaultDefLatency = true) const; unsigned computeInstrLatency(unsigned Opcode) const; + /// \brief Output dependency latency of a pair of defs of the same register. /// /// This is typically one cycle. Index: include/llvm/MC/MCSchedule.h =================================================================== --- include/llvm/MC/MCSchedule.h +++ include/llvm/MC/MCSchedule.h @@ -106,6 +106,7 @@ const char* Name; #endif unsigned short NumMicroOps; + bool SingleIssue; bool BeginGroup; bool EndGroup; unsigned WriteProcResIdx; // First index into WriteProcResTable. Index: include/llvm/Target/TargetSchedule.td =================================================================== --- include/llvm/Target/TargetSchedule.td +++ include/llvm/Target/TargetSchedule.td @@ -255,6 +255,8 @@ // Allow a processor to mark some scheduling classes as unsupported // for stronger verification. bit Unsupported = 0; + // Allow a processor to mark some scheduling classes as single-issue + bit SingleIssue = 0; SchedMachineModel SchedModel = ?; } Index: lib/CodeGen/MachineScheduler.cpp =================================================================== --- lib/CodeGen/MachineScheduler.cpp +++ lib/CodeGen/MachineScheduler.cpp @@ -1133,6 +1133,11 @@ dbgs() << " Pressure Diff : "; getPressureDiff(&SU).dump(*TRI); } + dbgs() << " Single Issue : "; + if (SchedModel.isSingleIssue(SU.getInstr())) + dbgs() << "true;"; + else + dbgs() << "false;"; dbgs() << '\n'; } if (ExitSU.getInstr() != nullptr) @@ -1864,12 +1869,20 @@ && HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard) { return true; } + + if ((CurrMOps > 0) && SchedModel->isSingleIssue(SU->getInstr())) { + DEBUG(dbgs() << " SU(" << SU->NodeNum + << ") not issued (single issue instruction)\n"); + return true; + } + unsigned uops = SchedModel->getNumMicroOps(SU->getInstr()); if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) { DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops=" << SchedModel->getNumMicroOps(SU->getInstr()) << '\n'); return true; } + if (SchedModel->hasInstrSchedModel() && SU->hasReservedResource) { const MCSchedClassDesc *SC = DAG->getSchedClass(SU); for (TargetSchedModel::ProcResIter Index: lib/CodeGen/TargetSchedule.cpp =================================================================== --- lib/CodeGen/TargetSchedule.cpp +++ lib/CodeGen/TargetSchedule.cpp @@ -73,6 +73,18 @@ } } +/// Returns true only if instruction is specified as single issue. +bool TargetSchedModel::isSingleIssue(const MachineInstr *MI, + const MCSchedClassDesc *SC) const { + if (hasInstrSchedModel()) { + if (!SC) + SC = resolveSchedClass(MI); + if (SC->isValid()) + return SC->SingleIssue; + } + return false; +} + unsigned TargetSchedModel::getNumMicroOps(const MachineInstr *MI, const MCSchedClassDesc *SC) const { if (hasInstrItineraries()) { Index: lib/Target/ARM/ARMScheduleR52.td =================================================================== --- lib/Target/ARM/ARMScheduleR52.td +++ lib/Target/ARM/ARMScheduleR52.td @@ -145,7 +145,7 @@ let Latency = 4; let NumMicroOps = 0; } def R52WriteDIV : SchedWriteRes<[R52UnitDiv]> { - let Latency = 8; let ResourceCycles = [8]; // not pipelined + let Latency = 8; let ResourceCycles = [8]; // not pipelined } def R52WriteLd : SchedWriteRes<[R52UnitLd]> { let Latency = 4; } def R52WriteST : SchedWriteRes<[R52UnitLd]> { let Latency = 4; } @@ -261,7 +261,6 @@ "SMULTB", "SMULTT", "SMULWB", "SMULWT", "SMUSD", "SMUSDXi", "t2MUL", "t2SMMUL", "t2SMMULR", "t2SMULBB", "t2SMULBT", "t2SMULTB", "t2SMULTT", "t2SMULWB", "t2SMULWT", "t2SMUSD")>; - // Multiply Accumulate // Even for 64-bit accumulation (or Long), the single MAC is used (not ALUs). // The store pipeline is used partly for 64-bit operations. @@ -727,16 +726,19 @@ let Latency = 6; let NumMicroOps = 3; let ResourceCycles = [2]; + let SingleIssue = 1; } def R52WriteVLD3Mem : SchedWriteRes<[R52UnitLd]> { let Latency = 7; let NumMicroOps = 5; let ResourceCycles = [3]; + let SingleIssue = 1; } def R52WriteVLD4Mem : SchedWriteRes<[R52UnitLd]> { let Latency = 8; let NumMicroOps = 7; let ResourceCycles = [4]; + let SingleIssue = 1; } def R52WriteVST1Mem : SchedWriteRes<[R52UnitLd]> { let Latency = 5; Index: test/CodeGen/ARM/single-issue-r52.mir =================================================================== --- /dev/null +++ test/CodeGen/ARM/single-issue-r52.mir @@ -0,0 +1,77 @@ +# RUN: llc -o /dev/null %s -mtriple=arm-eabi -mcpu=cortex-r52 -run-pass machine-scheduler -enable-misched -debug-only=misched 2>&1 | FileCheck %s --check-prefix=CHECK +# REQUIRES: asserts +--- | + ; ModuleID = 'foo.ll' + source_filename = "foo.ll" + target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" + target triple = "arm---eabi" + + %struct.__neon_int8x8x4_t = type { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } + ; Function Attrs: nounwind + define <8 x i8> @foo(i8* %A) { + %tmp1 = call %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8.p0i8(i8* %A, i32 8) + %tmp2 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 0 + %tmp3 = extractvalue %struct.__neon_int8x8x4_t %tmp1, 1 + %tmp4 = add <8 x i8> %tmp2, %tmp3 + ret <8 x i8> %tmp4 + } + declare %struct.__neon_int8x8x4_t @llvm.arm.neon.vld4.v8i8.p0i8(i8*, i32) + +# CHECK: ********** MI Scheduling ********** +# CHECK: ScheduleDAGMILive::schedule starting +# CHECK: SU(1): %vreg1 = VLD4d8Pseudo %vreg0, 8, pred:14, pred:%noreg; mem:LD32[%A](align=8) QQPR:%vreg1 GPR:%vreg0 +# CHECK: Latency : 8 +# CHECK: Single Issue : true; +# CHECK: SU(2): %vreg4 = VADDv8i8 %vreg1:dsub_0, %vreg1:dsub_1, pred:14, pred:%noreg; DPR:%vreg4 QQPR:%vreg1 +# CHECK: Latency : 5 +# CHECK: Single Issue : false; +# CHECK: SU(3): %vreg5, %vreg6 = VMOVRRD %vreg4, pred:14, pred:%noreg; GPR:%vreg5,%vreg6 DPR:%vreg4 +# CHECK: Latency : 4 +# CHECK: Single Issue : false; + +... +--- +name: foo +alignment: 2 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: gpr } + - { id: 1, class: qqpr } + - { id: 2, class: dpr } + - { id: 3, class: dpr } + - { id: 4, class: dpr } + - { id: 5, class: gpr } + - { id: 6, class: gpr } +liveins: + - { reg: '%r0', virtual-reg: '%0' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + maxCallFrameSize: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false +body: | + bb.0 (%ir-block.0): + liveins: %r0 + + %0 = COPY %r0 + %1 = VLD4d8Pseudo %0, 8, 14, _ :: (load 32 from %ir.A, align 8) + %4 = VADDv8i8 %1.dsub_0, %1.dsub_1, 14, _ + %5, %6 = VMOVRRD %4, 14, _ + %r0 = COPY %5 + %r1 = COPY %6 + BX_RET 14, _, implicit %r0, implicit killed %r1 + +... Index: utils/TableGen/SubtargetEmitter.cpp =================================================================== --- utils/TableGen/SubtargetEmitter.cpp +++ utils/TableGen/SubtargetEmitter.cpp @@ -812,6 +812,7 @@ MCSchedClassDesc &SCDesc = SCTab.back(); // SCDesc.Name is guarded by NDEBUG SCDesc.NumMicroOps = 0; + SCDesc.SingleIssue = false; SCDesc.BeginGroup = false; SCDesc.EndGroup = false; SCDesc.WriteProcResIdx = 0; @@ -915,6 +916,7 @@ } WLEntry.Cycles += WriteRes->getValueAsInt("Latency"); SCDesc.NumMicroOps += WriteRes->getValueAsInt("NumMicroOps"); + SCDesc.SingleIssue = SCDesc.SingleIssue || WriteRes->getValueAsBit("SingleIssue"); SCDesc.BeginGroup |= WriteRes->getValueAsBit("BeginGroup"); SCDesc.EndGroup |= WriteRes->getValueAsBit("EndGroup"); @@ -1105,7 +1107,7 @@ std::vector &SCTab = SchedTables.ProcSchedClasses[1 + (PI - SchedModels.procModelBegin())]; - OS << "\n// {Name, NumMicroOps, BeginGroup, EndGroup," + OS << "\n// {Name, NumMicroOps, SingleIssue, BeginGroup, EndGroup," << " WriteProcResIdx,#, WriteLatencyIdx,#, ReadAdvanceIdx,#}\n"; OS << "static const llvm::MCSchedClassDesc " << PI->ModelName << "SchedClasses[] = {\n"; @@ -1116,7 +1118,7 @@ && "invalid class not first"); OS << " {DBGFIELD(\"InvalidSchedClass\") " << MCSchedClassDesc::InvalidNumMicroOps - << ", false, false, 0, 0, 0, 0, 0, 0},\n"; + << ", false, false, false, 0, 0, 0, 0, 0, 0},\n"; for (unsigned SCIdx = 1, SCEnd = SCTab.size(); SCIdx != SCEnd; ++SCIdx) { MCSchedClassDesc &MCDesc = SCTab[SCIdx]; @@ -1125,6 +1127,7 @@ if (SchedClass.Name.size() < 18) OS.indent(18 - SchedClass.Name.size()); OS << MCDesc.NumMicroOps + << ", " << (MCDesc.SingleIssue ? "true" : "false") << ", " << ( MCDesc.BeginGroup ? "true" : "false" ) << ", " << ( MCDesc.EndGroup ? "true" : "false" ) << ", " << format("%2d", MCDesc.WriteProcResIdx)