diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td --- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td +++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td @@ -88,6 +88,10 @@ // Define customized scheduler read/write types specific to the Neoverse V2. //===----------------------------------------------------------------------===// + +// Define generic 0 micro-op types +def V2Write_0cyc : SchedWriteRes<[]> { let Latency = 0; } + // Define generic 1 micro-op types def V2Write_1cyc_1B : SchedWriteRes<[V2UnitB]> { let Latency = 1; } @@ -900,6 +904,18 @@ SchedVar, SchedVar]>; +def V2Write_0or1cyc_1I : SchedWriteVariant<[ + SchedVar, + SchedVar]>; + +def V2Write_0or2cyc_1V : SchedWriteVariant<[ + SchedVar, + SchedVar]>; + +def V2Write_0or3cyc_1M0 : SchedWriteVariant<[ + SchedVar, + SchedVar]>; + def V2Write_2or3cyc_1M : SchedWriteVariant<[ SchedVar, SchedVar]>; @@ -1092,9 +1108,9 @@ // ALU, basic // ALU, basic, flagset -def : SchedAlias; -def : InstRW<[V2Write_1cyc_1F], - (instregex "^(ADC|SBC)S[WX]r$")>; +def : SchedAlias; +def : InstRW<[V2Write_1cyc_1F], (instregex "^(ADC|SBC)S[WX]r$")>; +def : InstRW<[V2Write_0or1cyc_1I], (instregex "^MOVZ[WX]i$")>; // ALU, extend and shift def : SchedAlias; @@ -1122,8 +1138,8 @@ def : InstRW<[V2Write_1cyc_1I], (instrs GMI, SUBP, SUBPS)>; // Logical, shift, no flagset -def : InstRW<[V2Write_1cyc_1I], - (instregex "^(AND|BIC|EON|EOR|ORN|ORR)[WX]rs$")>; +def : InstRW<[V2Write_1cyc_1I], (instregex "^(AND|BIC|EON|EOR|ORN)[WX]rs$")>; +def : InstRW<[V2Write_0or1cyc_1I], (instregex "^ORR[WX]rs$")>; // Logical, shift, flagset def : InstRW<[V2Write_Logical], (instregex "^(AND|BIC)S[WX]rs$")>; @@ -1318,7 +1334,8 @@ def : SchedAlias; // FP transfer, from gen to low half of vec reg -def : InstRW<[V2Write_3cyc_1M0], (instrs FMOVWHr, FMOVXHr, FMOVWSr, FMOVXDr)>; +def : InstRW<[V2Write_0or3cyc_1M0], + (instrs FMOVWHr, FMOVXHr, FMOVWSr, FMOVXDr)>; // FP transfer, from gen to high half of vec reg def : InstRW<[V2Write_5cyc_1M0_1V], (instrs FMOVXDHighr)>; @@ -1662,6 +1679,7 @@ // ASIMD transpose // ASIMD unzip/zip // Handled by SchedAlias +def : InstRW<[V2Write_0or2cyc_1V], (instrs MOVID, MOVIv2d_ns)>; // ASIMD duplicate, gen reg def : InstRW<[V2Write_3cyc_1M0], (instregex "^DUPv.+gpr")>; diff --git a/llvm/lib/Target/AArch64/AArch64SchedPredNeoverse.td b/llvm/lib/Target/AArch64/AArch64SchedPredNeoverse.td --- a/llvm/lib/Target/AArch64/AArch64SchedPredNeoverse.td +++ b/llvm/lib/Target/AArch64/AArch64SchedPredNeoverse.td @@ -54,3 +54,30 @@ SMADDLrrr, UMADDLrrr, SMSUBLrrr, UMSUBLrrr]>, CheckIsReg3Zero]>>; + +def NeoverseZeroMove : MCSchedPredicate< + CheckAny<[ + // MOV Wd, #0 + // MOV Xd, #0 + CheckAll<[CheckOpcode<[MOVZWi, MOVZXi]>, + CheckAll<[CheckImmOperand<1, 0>, + CheckImmOperand<2, 0>]>]>, + // MOV Wd, WZR + // MOV Xd, XZR + // MOV Wd, Wn + // MOV Xd, Xn + CheckAll<[CheckOpcode<[ORRWrs, ORRXrs]>, + CheckAll<[CheckIsReg1Zero, + CheckImmOperand<3, 0>]>]>, + // FMOV Hd, WZR + // FMOV Hd, XZR + // FMOV Sd, WZR + // FMOV Dd, XZR + CheckAll<[CheckOpcode<[FMOVWHr, FMOVXHr, + FMOVWSr, FMOVXDr]>, + CheckIsReg1Zero]>, + // MOVI Dd, #0 + // MOVI Vd.2D, #0 + CheckAll<[CheckOpcode<[MOVID, MOVIv2d_ns]>, + CheckImmOperand<1, 0>]> + ]>>; diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-basic-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-basic-instructions.s --- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-basic-instructions.s +++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-basic-instructions.s @@ -2532,14 +2532,14 @@ # CHECK-NEXT: 1 2 0.50 bics x3, xzr, x3, lsl #1 # CHECK-NEXT: 1 2 0.50 tst w3, w7, lsl #31 # CHECK-NEXT: 1 2 0.50 tst x2, x20, asr #2 -# CHECK-NEXT: 1 1 0.17 mov x3, x6 -# CHECK-NEXT: 1 1 0.17 mov x3, xzr -# CHECK-NEXT: 1 1 0.17 mov wzr, w2 -# CHECK-NEXT: 1 1 0.17 mov w3, w5 +# CHECK-NEXT: 1 0 0.06 mov x3, x6 +# CHECK-NEXT: 1 0 0.06 mov x3, xzr +# CHECK-NEXT: 1 0 0.06 mov wzr, w2 +# CHECK-NEXT: 1 0 0.06 mov w3, w5 # CHECK-NEXT: 1 1 0.17 movz w2, #0, lsl #16 # CHECK-NEXT: 1 1 0.17 mov w2, #-1235 # CHECK-NEXT: 1 1 0.17 mov x2, #5299989643264 -# CHECK-NEXT: 1 1 0.17 mov x2, #0 +# CHECK-NEXT: 1 0 0.06 mov x2, #0 # CHECK-NEXT: 1 1 0.17 movk w3, #0 # CHECK-NEXT: 1 1 0.17 movz x4, #0, lsl #16 # CHECK-NEXT: 1 1 0.17 movk w5, #0, lsl #16 @@ -2585,7 +2585,7 @@ # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2] [3.0] [3.1] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] -# CHECK-NEXT: 11.00 11.00 33.00 33.00 99.00 165.00 165.00 327.42 182.42 110.42 110.42 92.67 92.67 190.00 146.00 30.00 10.00 +# CHECK-NEXT: 11.00 11.00 33.00 33.00 99.00 165.00 165.00 326.58 181.58 109.58 109.58 91.83 91.83 190.00 146.00 30.00 10.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2] [3.0] [3.1] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions: @@ -3732,14 +3732,14 @@ # CHECK-NEXT: - - - - - - - 0.50 0.50 - - - - - - - - bics x3, xzr, x3, lsl #1 # CHECK-NEXT: - - - - - - - 0.50 0.50 - - - - - - - - tst w3, w7, lsl #31 # CHECK-NEXT: - - - - - - - 0.50 0.50 - - - - - - - - tst x2, x20, asr #2 -# CHECK-NEXT: - - - - - - - 0.17 0.17 0.17 0.17 0.17 0.17 - - - - mov x3, x6 -# CHECK-NEXT: - - - - - - - 0.17 0.17 0.17 0.17 0.17 0.17 - - - - mov x3, xzr -# CHECK-NEXT: - - - - - - - 0.17 0.17 0.17 0.17 0.17 0.17 - - - - mov wzr, w2 -# CHECK-NEXT: - - - - - - - 0.17 0.17 0.17 0.17 0.17 0.17 - - - - mov w3, w5 +# CHECK-NEXT: - - - - - - - - - - - - - - - - - mov x3, x6 +# CHECK-NEXT: - - - - - - - - - - - - - - - - - mov x3, xzr +# CHECK-NEXT: - - - - - - - - - - - - - - - - - mov wzr, w2 +# CHECK-NEXT: - - - - - - - - - - - - - - - - - mov w3, w5 # CHECK-NEXT: - - - - - - - 0.17 0.17 0.17 0.17 0.17 0.17 - - - - movz w2, #0, lsl #16 # CHECK-NEXT: - - - - - - - 0.17 0.17 0.17 0.17 0.17 0.17 - - - - mov w2, #-1235 # CHECK-NEXT: - - - - - - - 0.17 0.17 0.17 0.17 0.17 0.17 - - - - mov x2, #5299989643264 -# CHECK-NEXT: - - - - - - - 0.17 0.17 0.17 0.17 0.17 0.17 - - - - mov x2, #0 +# CHECK-NEXT: - - - - - - - - - - - - - - - - - mov x2, #0 # CHECK-NEXT: - - - - - - - 0.17 0.17 0.17 0.17 0.17 0.17 - - - - movk w3, #0 # CHECK-NEXT: - - - - - - - 0.17 0.17 0.17 0.17 0.17 0.17 - - - - movz x4, #0, lsl #16 # CHECK-NEXT: - - - - - - - 0.17 0.17 0.17 0.17 0.17 0.17 - - - - movk w5, #0, lsl #16 diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-zero-lat-movs.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-zero-lat-movs.s new file mode 100644 --- /dev/null +++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/V2-zero-lat-movs.s @@ -0,0 +1,75 @@ +# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py +# RUN: llvm-mca -mtriple=aarch64 -mcpu=neoverse-v2 -instruction-tables < %s | FileCheck %s + +mov x1, #0 +mov x1, xzr +mov w1, #0 +mov w1, wzr +fmov h1, wzr +fmov h1, xzr +fmov s1, wzr +fmov d1, xzr +movi d1, #0 +movi v1.2d, #0 +mov w1, w2 +mov x1, x2 + +# CHECK: Instruction Info: +# CHECK-NEXT: [1]: #uOps +# CHECK-NEXT: [2]: Latency +# CHECK-NEXT: [3]: RThroughput +# CHECK-NEXT: [4]: MayLoad +# CHECK-NEXT: [5]: MayStore +# CHECK-NEXT: [6]: HasSideEffects (U) + +# CHECK: [1] [2] [3] [4] [5] [6] Instructions: +# CHECK-NEXT: 1 0 0.06 mov x1, #0 +# CHECK-NEXT: 1 0 0.06 mov x1, xzr +# CHECK-NEXT: 1 0 0.06 mov w1, #0 +# CHECK-NEXT: 1 0 0.06 mov w1, wzr +# CHECK-NEXT: 1 0 0.06 fmov h1, wzr +# CHECK-NEXT: 1 0 0.06 fmov h1, xzr +# CHECK-NEXT: 1 0 0.06 fmov s1, wzr +# CHECK-NEXT: 1 0 0.06 fmov d1, xzr +# CHECK-NEXT: 1 0 0.06 movi d1, #0000000000000000 +# CHECK-NEXT: 1 0 0.06 movi v1.2d, #0000000000000000 +# CHECK-NEXT: 1 0 0.06 mov w1, w2 +# CHECK-NEXT: 1 0 0.06 mov x1, x2 + +# CHECK: Resources: +# CHECK-NEXT: [0.0] - V2UnitB +# CHECK-NEXT: [0.1] - V2UnitB +# CHECK-NEXT: [1.0] - V2UnitD +# CHECK-NEXT: [1.1] - V2UnitD +# CHECK-NEXT: [2] - V2UnitL2 +# CHECK-NEXT: [3.0] - V2UnitL01 +# CHECK-NEXT: [3.1] - V2UnitL01 +# CHECK-NEXT: [4] - V2UnitM0 +# CHECK-NEXT: [5] - V2UnitM1 +# CHECK-NEXT: [6] - V2UnitS0 +# CHECK-NEXT: [7] - V2UnitS1 +# CHECK-NEXT: [8] - V2UnitS2 +# CHECK-NEXT: [9] - V2UnitS3 +# CHECK-NEXT: [10] - V2UnitV0 +# CHECK-NEXT: [11] - V2UnitV1 +# CHECK-NEXT: [12] - V2UnitV2 +# CHECK-NEXT: [13] - V2UnitV3 + +# CHECK: Resource pressure per iteration: +# CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2] [3.0] [3.1] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] +# CHECK-NEXT: - - - - - - - - - - - - - - - - - + +# CHECK: Resource pressure by instruction: +# CHECK-NEXT: [0.0] [0.1] [1.0] [1.1] [2] [3.0] [3.1] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions: +# CHECK-NEXT: - - - - - - - - - - - - - - - - - mov x1, #0 +# CHECK-NEXT: - - - - - - - - - - - - - - - - - mov x1, xzr +# CHECK-NEXT: - - - - - - - - - - - - - - - - - mov w1, #0 +# CHECK-NEXT: - - - - - - - - - - - - - - - - - mov w1, wzr +# CHECK-NEXT: - - - - - - - - - - - - - - - - - fmov h1, wzr +# CHECK-NEXT: - - - - - - - - - - - - - - - - - fmov h1, xzr +# CHECK-NEXT: - - - - - - - - - - - - - - - - - fmov s1, wzr +# CHECK-NEXT: - - - - - - - - - - - - - - - - - fmov d1, xzr +# CHECK-NEXT: - - - - - - - - - - - - - - - - - movi d1, #0000000000000000 +# CHECK-NEXT: - - - - - - - - - - - - - - - - - movi v1.2d, #0000000000000000 +# CHECK-NEXT: - - - - - - - - - - - - - - - - - mov w1, w2 +# CHECK-NEXT: - - - - - - - - - - - - - - - - - mov x1, x2