diff --git a/llvm/lib/Target/ARM/ARMScheduleM7.td b/llvm/lib/Target/ARM/ARMScheduleM7.td --- a/llvm/lib/Target/ARM/ARMScheduleM7.td +++ b/llvm/lib/Target/ARM/ARMScheduleM7.td @@ -19,6 +19,8 @@ let CompleteModel = 0; } +let SchedModel = CortexM7Model in { + //===--------------------------------------------------------------------===// // The Cortex-M7 has two ALU, two LOAD, a STORE, a MAC, a BRANCH and a VFP // pipe. The stages relevant to scheduling are as follows: @@ -33,7 +35,9 @@ // for scheduling, so simple ALU operations executing in EX2 will have // ReadAdvance<0> (the default) for their source operands and Latency = 1. -def M7UnitLoad : ProcResource<2> { let BufferSize = 0; } +def M7UnitLoadL : ProcResource<1> { let BufferSize = 0; } +def M7UnitLoadH : ProcResource<1> { let BufferSize = 0; } +def M7UnitLoad : ProcResGroup<[M7UnitLoadL,M7UnitLoadH]> { let BufferSize = 0; } def M7UnitStore : ProcResource<1> { let BufferSize = 0; } def M7UnitALU : ProcResource<2>; def M7UnitShift1 : ProcResource<1> { let BufferSize = 0; } @@ -41,14 +45,14 @@ def M7UnitMAC : ProcResource<1> { let BufferSize = 0; } def M7UnitBranch : ProcResource<1> { let BufferSize = 0; } def M7UnitVFP : ProcResource<1> { let BufferSize = 0; } -def M7UnitVPort : ProcResource<2> { let BufferSize = 0; } +def M7UnitVPortL : ProcResource<1> { let BufferSize = 0; } +def M7UnitVPortH : ProcResource<1> { let BufferSize = 0; } +def M7UnitVPort : ProcResGroup<[M7UnitVPortL,M7UnitVPortH]> { let BufferSize = 0; } def M7UnitSIMD : ProcResource<1> { let BufferSize = 0; } //===---------------------------------------------------------------------===// // Subtarget-specific SchedWrite types with map ProcResources and set latency. -let SchedModel = CortexM7Model in { - def : WriteRes { let Latency = 1; } // Basic ALU with shifts. @@ -105,39 +109,42 @@ // Floating point conversions. def : WriteRes { let Latency = 3; } def : WriteRes { let Latency = 3; } +def M7WriteFPMOV64 : SchedWriteRes<[M7UnitVPortL, M7UnitVPortH]> { + let Latency = 3; +} // The FP pipeline has a latency of 3 cycles. // ALU operations (32/64-bit). These go down the FP pipeline. def : WriteRes { let Latency = 3; } -def : WriteRes { +def : WriteRes { let Latency = 4; let BeginGroup = 1; } // Multiplication def : WriteRes { let Latency = 3; } -def : WriteRes { +def : WriteRes { let Latency = 7; let BeginGroup = 1; } // Multiply-accumulate. FPMAC goes down the FP Pipeline. def : WriteRes { let Latency = 6; } -def : WriteRes { +def : WriteRes { let Latency = 11; let BeginGroup = 1; } // Division. Effective scheduling latency is 3, though real latency is larger def : WriteRes { let Latency = 16; } -def : WriteRes { +def : WriteRes { let Latency = 30; let BeginGroup = 1; } // Square-root. Effective scheduling latency is 3; real latency is larger def : WriteRes { let Latency = 16; } -def : WriteRes { +def : WriteRes { let Latency = 30; let BeginGroup = 1; } @@ -283,12 +290,12 @@ // VFP loads and stores def M7LoadSP : SchedWriteRes<[M7UnitLoad, M7UnitVPort]> { let Latency = 1; } -def M7LoadDP : SchedWriteRes<[M7UnitLoad, M7UnitVPort, M7UnitVPort]> { +def M7LoadDP : SchedWriteRes<[M7UnitLoadL, M7UnitLoadH, M7UnitVPortL, M7UnitVPortH]> { let Latency = 2; let SingleIssue = 1; } def M7StoreSP : SchedWriteRes<[M7UnitStore, M7UnitVPort]>; -def M7StoreDP : SchedWriteRes<[M7UnitStore, M7UnitVPort, M7UnitVPort]> { +def M7StoreDP : SchedWriteRes<[M7UnitStore, M7UnitVPortL, M7UnitVPortH]> { let SingleIssue = 1; } diff --git a/llvm/test/CodeGen/ARM/cortex-m7-wideops.mir b/llvm/test/CodeGen/ARM/cortex-m7-wideops.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/ARM/cortex-m7-wideops.mir @@ -0,0 +1,39 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple arm-arm-eabi -mcpu=cortex-m7 -verify-machineinstrs -run-pass=postmisched %s -o - | FileCheck %s +--- +name: test_groups +alignment: 2 +tracksRegLiveness: true +liveins: + - { reg: '$d0' } + - { reg: '$r0' } + - { reg: '$r1' } + - { reg: '$r2' } + - { reg: '$r3' } + - { reg: '$r4' } +frameInfo: + maxAlignment: 1 + maxCallFrameSize: 0 +machineFunctionInfo: {} +body: | + bb.0: + liveins: $d0, $r0, $r1, $r2, $r3, $r4 + + ; CHECK-LABEL: name: test_groups + ; CHECK: liveins: $d0, $r0, $r1, $r2, $r3, $r4 + ; CHECK: renamable $d0 = VADDD killed renamable $d0, renamable $d0, 14 /* CC::al */, $noreg + ; CHECK: renamable $s2 = VLDRS killed renamable $r0, 0, 14 /* CC::al */, $noreg + ; CHECK: VSTRS killed renamable $s2, killed renamable $r1, 0, 14 /* CC::al */, $noreg + ; CHECK: renamable $r3 = t2ADDrr killed renamable $r3, renamable $r3, 14 /* CC::al */, $noreg, $noreg + ; CHECK: t2STRi12 killed renamable $r3, killed renamable $r2, 0, 14 /* CC::al */, $noreg + ; CHECK: renamable $r4 = t2ADDrr killed renamable $r4, renamable $r4, 14 /* CC::al */, $noreg, $noreg + ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit killed $d0 + renamable $s2 = VLDRS killed renamable $r0, 0, 14 /* CC::al */, $noreg + renamable $d0 = VADDD killed renamable $d0, renamable $d0, 14 /* CC::al */, $noreg + VSTRS killed renamable $s2, killed renamable $r1, 0, 14 /* CC::al */, $noreg + renamable $r3 = t2ADDrr killed renamable $r3, renamable $r3, 14 /* CC::al */, $noreg, $noreg + t2STRi12 killed renamable $r3, killed renamable $r2, 0, 14 /* CC::al */, $noreg + renamable $r4 = t2ADDrr killed renamable $r4, renamable $r4, 14 /* CC::al */, $noreg, $noreg + tBX_RET 14 /* CC::al */, $noreg, implicit $d0 + +... diff --git a/llvm/test/tools/llvm-mca/ARM/m7-fp.s b/llvm/test/tools/llvm-mca/ARM/m7-fp.s --- a/llvm/test/tools/llvm-mca/ARM/m7-fp.s +++ b/llvm/test/tools/llvm-mca/ARM/m7-fp.s @@ -253,23 +253,23 @@ # CHECK-NEXT: [0.0] - M7UnitALU # CHECK-NEXT: [0.1] - M7UnitALU # CHECK-NEXT: [1] - M7UnitBranch -# CHECK-NEXT: [2.0] - M7UnitLoad -# CHECK-NEXT: [2.1] - M7UnitLoad -# CHECK-NEXT: [3] - M7UnitMAC -# CHECK-NEXT: [4] - M7UnitSIMD -# CHECK-NEXT: [5] - M7UnitShift1 -# CHECK-NEXT: [6] - M7UnitShift2 -# CHECK-NEXT: [7] - M7UnitStore -# CHECK-NEXT: [8] - M7UnitVFP -# CHECK-NEXT: [9.0] - M7UnitVPort -# CHECK-NEXT: [9.1] - M7UnitVPort +# CHECK-NEXT: [2] - M7UnitLoadH +# CHECK-NEXT: [3] - M7UnitLoadL +# CHECK-NEXT: [4] - M7UnitMAC +# CHECK-NEXT: [5] - M7UnitSIMD +# CHECK-NEXT: [6] - M7UnitShift1 +# CHECK-NEXT: [7] - M7UnitShift2 +# CHECK-NEXT: [8] - M7UnitStore +# CHECK-NEXT: [9] - M7UnitVFP +# CHECK-NEXT: [10] - M7UnitVPortH +# CHECK-NEXT: [11] - M7UnitVPortL # CHECK: Resource pressure per iteration: -# CHECK-NEXT: [0.0] [0.1] [1] [2.0] [2.1] [3] [4] [5] [6] [7] [8] [9.0] [9.1] -# CHECK-NEXT: - - - 1.00 1.00 - - - - 2.00 104.00 81.00 81.00 +# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] +# CHECK-NEXT: - - - 1.50 1.50 - - - - 2.00 104.00 81.00 81.00 # CHECK: Resource pressure by instruction: -# CHECK-NEXT: [0.0] [0.1] [1] [2.0] [2.1] [3] [4] [5] [6] [7] [8] [9.0] [9.1] Instructions: +# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions: # CHECK-NEXT: - - - - - - - - - - 1.00 0.50 0.50 vabs.f32 s0, s2 # CHECK-NEXT: - - - - - - - - - - 1.00 1.00 1.00 vabs.f64 d0, d2 # CHECK-NEXT: - - - - - - - - - - 1.00 0.50 0.50 vadd.f32 s0, s2, s1 @@ -384,7 +384,7 @@ # CHECK-NEXT: - - - - - - - - - - 1.00 1.00 1.00 vsqrt.f64 d0, d2 # CHECK-NEXT: - - - - - - - - - - 1.00 0.50 0.50 vsub.f32 s0, s2, s1 # CHECK-NEXT: - - - - - - - - - - 1.00 1.00 1.00 vsub.f64 d0, d2, d1 -# CHECK-NEXT: - - - 0.50 0.50 - - - - - - 1.00 1.00 vldr d0, [r0] +# CHECK-NEXT: - - - 1.00 1.00 - - - - - - 1.00 1.00 vldr d0, [r0] # CHECK-NEXT: - - - 0.50 0.50 - - - - - - 0.50 0.50 vldr s0, [r0] # CHECK-NEXT: - - - - - - - - - 1.00 - 1.00 1.00 vstr d0, [r0] # CHECK-NEXT: - - - - - - - - - 1.00 - 0.50 0.50 vstr s0, [r0] diff --git a/llvm/test/tools/llvm-mca/ARM/m7-int.s b/llvm/test/tools/llvm-mca/ARM/m7-int.s --- a/llvm/test/tools/llvm-mca/ARM/m7-int.s +++ b/llvm/test/tools/llvm-mca/ARM/m7-int.s @@ -862,23 +862,23 @@ # CHECK-NEXT: [0.0] - M7UnitALU # CHECK-NEXT: [0.1] - M7UnitALU # CHECK-NEXT: [1] - M7UnitBranch -# CHECK-NEXT: [2.0] - M7UnitLoad -# CHECK-NEXT: [2.1] - M7UnitLoad -# CHECK-NEXT: [3] - M7UnitMAC -# CHECK-NEXT: [4] - M7UnitSIMD -# CHECK-NEXT: [5] - M7UnitShift1 -# CHECK-NEXT: [6] - M7UnitShift2 -# CHECK-NEXT: [7] - M7UnitStore -# CHECK-NEXT: [8] - M7UnitVFP -# CHECK-NEXT: [9.0] - M7UnitVPort -# CHECK-NEXT: [9.1] - M7UnitVPort +# CHECK-NEXT: [2] - M7UnitLoadH +# CHECK-NEXT: [3] - M7UnitLoadL +# CHECK-NEXT: [4] - M7UnitMAC +# CHECK-NEXT: [5] - M7UnitSIMD +# CHECK-NEXT: [6] - M7UnitShift1 +# CHECK-NEXT: [7] - M7UnitShift2 +# CHECK-NEXT: [8] - M7UnitStore +# CHECK-NEXT: [9] - M7UnitVFP +# CHECK-NEXT: [10] - M7UnitVPortH +# CHECK-NEXT: [11] - M7UnitVPortL # CHECK: Resource pressure per iteration: -# CHECK-NEXT: [0.0] [0.1] [1] [2.0] [2.1] [3] [4] [5] [6] [7] [8] [9.0] [9.1] +# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] # CHECK-NEXT: 125.00 125.00 - 35.00 35.00 43.00 90.00 88.00 2.00 45.00 - - - # CHECK: Resource pressure by instruction: -# CHECK-NEXT: [0.0] [0.1] [1] [2.0] [2.1] [3] [4] [5] [6] [7] [8] [9.0] [9.1] Instructions: +# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions: # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - adc r0, r1, #0 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - adcs r0, r1, #0 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - adcs r0, r1 diff --git a/llvm/test/tools/llvm-mca/ARM/m7-negative-readadvance.s b/llvm/test/tools/llvm-mca/ARM/m7-negative-readadvance.s --- a/llvm/test/tools/llvm-mca/ARM/m7-negative-readadvance.s +++ b/llvm/test/tools/llvm-mca/ARM/m7-negative-readadvance.s @@ -34,26 +34,26 @@ # CHECK-NEXT: [0.0] - M7UnitALU # CHECK-NEXT: [0.1] - M7UnitALU # CHECK-NEXT: [1] - M7UnitBranch -# CHECK-NEXT: [2.0] - M7UnitLoad -# CHECK-NEXT: [2.1] - M7UnitLoad -# CHECK-NEXT: [3] - M7UnitMAC -# CHECK-NEXT: [4] - M7UnitSIMD -# CHECK-NEXT: [5] - M7UnitShift1 -# CHECK-NEXT: [6] - M7UnitShift2 -# CHECK-NEXT: [7] - M7UnitStore -# CHECK-NEXT: [8] - M7UnitVFP -# CHECK-NEXT: [9.0] - M7UnitVPort -# CHECK-NEXT: [9.1] - M7UnitVPort +# CHECK-NEXT: [2] - M7UnitLoadH +# CHECK-NEXT: [3] - M7UnitLoadL +# CHECK-NEXT: [4] - M7UnitMAC +# CHECK-NEXT: [5] - M7UnitSIMD +# CHECK-NEXT: [6] - M7UnitShift1 +# CHECK-NEXT: [7] - M7UnitShift2 +# CHECK-NEXT: [8] - M7UnitStore +# CHECK-NEXT: [9] - M7UnitVFP +# CHECK-NEXT: [10] - M7UnitVPortH +# CHECK-NEXT: [11] - M7UnitVPortL # CHECK: Resource pressure per iteration: -# CHECK-NEXT: [0.0] [0.1] [1] [2.0] [2.1] [3] [4] [5] [6] [7] [8] [9.0] [9.1] -# CHECK-NEXT: 1.00 1.00 - - 1.00 - - - - - - - 2.00 +# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] +# CHECK-NEXT: 1.00 1.00 - 1.00 1.00 - - - - - - 1.00 1.00 # CHECK: Resource pressure by instruction: -# CHECK-NEXT: [0.0] [0.1] [1] [2.0] [2.1] [3] [4] [5] [6] [7] [8] [9.0] [9.1] Instructions: +# CHECK-NEXT: [0.0] [0.1] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions: # CHECK-NEXT: - 1.00 - - - - - - - - - - - add.w r1, r1, #1 # CHECK-NEXT: 1.00 - - - - - - - - - - - - add.w r1, r1, #2 -# CHECK-NEXT: - - - - 1.00 - - - - - - - 2.00 vldr d0, [r1] +# CHECK-NEXT: - - - 1.00 1.00 - - - - - - 1.00 1.00 vldr d0, [r1] # CHECK: Timeline view: # CHECK-NEXT: Index 012345