Index: llvm/lib/Target/AArch64/AArch64InstrFormats.td =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -3939,7 +3939,7 @@ (outs GPR64sp:$wback, regtype:$Rt), (ins GPR64sp:$Rn, simm9:$offset), asm, "$Rn = $wback,@earlyclobber $wback", []>, - Sched<[WriteLD, WriteAdr]>; + Sched<[WriteAdr, WriteLD]>; let mayStore = 1, mayLoad = 0 in class StorePreIdx sz, bit V, bits<2> opc, RegisterOperand regtype, @@ -3950,7 +3950,7 @@ asm, "$Rn = $wback,@earlyclobber $wback", [(set GPR64sp:$wback, (storeop (Ty regtype:$Rt), GPR64sp:$Rn, simm9:$offset))]>, - Sched<[WriteAdr, WriteST]>; + Sched<[WriteST, WriteAdr]>; } // hasSideEffects = 0 //--- @@ -3985,7 +3985,7 @@ (outs GPR64sp:$wback, regtype:$Rt), (ins GPR64sp:$Rn, simm9:$offset), asm, "$Rn = $wback,@earlyclobber $wback", []>, - Sched<[WriteLD, WriteAdr]>; + Sched<[WriteAdr, WriteLD]>; let mayStore = 1, mayLoad = 0 in class StorePostIdx sz, bit V, bits<2> opc, RegisterOperand regtype, @@ -3996,7 +3996,7 @@ asm, "$Rn = $wback,@earlyclobber $wback", [(set GPR64sp:$wback, (storeop (Ty regtype:$Rt), GPR64sp:$Rn, simm9:$offset))]>, - Sched<[WriteAdr, WriteST]>; + Sched<[WriteST, WriteAdr]>; } // hasSideEffects = 0 @@ -4082,7 +4082,7 @@ : BaseLoadStorePairPreIdx, - Sched<[WriteLD, WriteLDHi, WriteAdr]>; + Sched<[WriteAdr, WriteLD, WriteLDHi]>; let mayStore = 1, mayLoad = 0 in class StorePairPreIdx opc, bit V, RegisterOperand regtype, @@ -4091,7 +4091,7 @@ (ins regtype:$Rt, regtype:$Rt2, GPR64sp:$Rn, indextype:$offset), asm>, - Sched<[WriteAdr, WriteSTP]>; + Sched<[WriteSTP, WriteAdr]>; } // hasSideEffects = 0 // (post-indexed) @@ -4123,7 +4123,7 @@ : BaseLoadStorePairPostIdx, - Sched<[WriteLD, WriteLDHi, WriteAdr]>; + Sched<[WriteAdr, WriteLD, WriteLDHi]>; let mayStore = 1, mayLoad = 0 in class StorePairPostIdx opc, bit V, RegisterOperand regtype, @@ -4132,7 +4132,7 @@ (ins regtype:$Rt, regtype:$Rt2, GPR64sp:$Rn, idxtype:$offset), asm>, - Sched<[WriteAdr, WriteSTP]>; + Sched<[WriteSTP, WriteAdr]>; } // hasSideEffects = 0 // (no-allocate) Index: llvm/test/CodeGen/AArch64/merge-store-dependency.ll =================================================================== --- llvm/test/CodeGen/AArch64/merge-store-dependency.ll +++ llvm/test/CodeGen/AArch64/merge-store-dependency.ll @@ -14,19 +14,20 @@ ; A53-NEXT: .cfi_def_cfa_offset 16 ; A53-NEXT: .cfi_offset w19, -8 ; A53-NEXT: .cfi_offset w30, -16 -; A53-NEXT: movi v0.2d, #0000000000000000 -; A53-NEXT: mov x8, x0 -; A53-NEXT: mov x19, x8 -; A53-NEXT: mov w0, w1 -; A53-NEXT: mov w9, #256 -; A53-NEXT: str q0, [x19, #16]! -; A53-NEXT: str w1, [x19] -; A53-NEXT: mov w1, #4 -; A53-NEXT: stp x2, x3, [x8, #32] -; A53-NEXT: mov x2, x8 -; A53-NEXT: str q0, [x8] -; A53-NEXT: strh w9, [x8, #24] -; A53-NEXT: str wzr, [x8, #20] + +; A53-NEXT: movi v0.2d, #0000000000000000 +; A53-NEXT: mov x8, x0 +; A53-NEXT: mov x19, x8 +; A53-NEXT: mov w0, w1 +; A53-NEXT: mov w9, #256 +; A53-NEXT: str q0, [x19, #16]! +; A53-NEXT: str q0, [x8] +; A53-NEXT: strh w9, [x8, #24] +; A53-NEXT: str wzr, [x8, #20] +; A53-NEXT: str w1, [x19] +; A53-NEXT: mov w1, #4 +; A53-NEXT: stp x2, x3, [x8, #32] +; A53-NEXT: mov x2, x8 ; A53-NEXT: bl fcntl ; A53-NEXT: adrp x9, gv0 ; A53-NEXT: add x9, x9, :lo12:gv0 Index: llvm/test/tools/llvm-mca/AArch64/Exynos/load.s =================================================================== --- llvm/test/tools/llvm-mca/AArch64/Exynos/load.s +++ llvm/test/tools/llvm-mca/AArch64/Exynos/load.s @@ -20,7 +20,7 @@ # ALL: Iterations: 100 # ALL-NEXT: Instructions: 1200 -# ALL-NEXT: Total Cycles: 1904 +# ALL-NEXT: Total Cycles: 1304 # M3-NEXT: Total uOps: 1600 # M4-NEXT: Total uOps: 1400 @@ -28,11 +28,11 @@ # ALL: Dispatch Width: 6 -# M3-NEXT: uOps Per Cycle: 0.84 -# M4-NEXT: uOps Per Cycle: 0.74 -# M5-NEXT: uOps Per Cycle: 0.74 +# M3-NEXT: uOps Per Cycle: 1.23 +# M4-NEXT: uOps Per Cycle: 1.07 +# M5-NEXT: uOps Per Cycle: 1.07 -# ALL-NEXT: IPC: 0.63 +# ALL-NEXT: IPC: 0.92 # ALL-NEXT: Block RThroughput: 6.0 # ALL: Instruction Info: