diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -7916,6 +7916,8 @@ let Inst{10} = opc{0}; let Inst{9-5} = Zn; let Inst{4-0} = Zd; + + let hasSideEffects = 0; } multiclass sve_int_bin_cons_misc_0_c_fexpa { diff --git a/llvm/test/CodeGen/AArch64/sched-movprfx.ll b/llvm/test/CodeGen/AArch64/sched-movprfx.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sched-movprfx.ll @@ -0,0 +1,29 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple aarch64 -mcpu=tsv110 -mattr=+sve < %s | FileCheck %s + +; Check that the movprfx intrinsic does not prevent load instructions from +; being scheduled together. As load instructions have long latency, expected +; be preferentially issued. + + +; NOTE: The unused paramter ensures z0/z1 is free, avoiding the antidependence for schedule. +define @and_i64_zero( %pg, %a, %b, %c, * %base) { +; CHECK-LABEL: and_i64_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0] +; CHECK-NEXT: ptrue p1.d +; CHECK-NEXT: movprfx z0, z2 +; CHECK-NEXT: abs z0.d, p1/m, z2.d +; CHECK-NEXT: add z0.d, z0.d, z1.d +; CHECK-NEXT: ret + %data0 = tail call @llvm.abs.nxv2i64( %c, i1 0) + %data1 = call @llvm.masked.load.nxv2i64(* %base, + i32 1, + %pg, + undef) + %out = add %data0, %data1 + ret %out +} + +declare @llvm.abs.nxv2i64(, i1) +declare @llvm.masked.load.nxv2i64(*, i32, , ) diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-rem.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-rem.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-rem.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-rem.ll @@ -587,14 +587,14 @@ ; VBITS_GE_128-NEXT: ptrue p0.s, vl4 ; VBITS_GE_128-NEXT: ldp q2, q3, [x0] ; VBITS_GE_128-NEXT: ldp q4, q5, [x1, #32] -; VBITS_GE_128-NEXT: ldp q7, q6, [x1] ; VBITS_GE_128-NEXT: movprfx z16, z0 ; VBITS_GE_128-NEXT: sdiv z16.s, p0/m, z16.s, z4.s ; VBITS_GE_128-NEXT: mls v0.4s, v16.4s, v4.4s -; VBITS_GE_128-NEXT: movprfx z4, z3 -; VBITS_GE_128-NEXT: sdiv z4.s, p0/m, z4.s, z6.s ; VBITS_GE_128-NEXT: movprfx z16, z1 ; VBITS_GE_128-NEXT: sdiv z16.s, p0/m, z16.s, z5.s +; VBITS_GE_128-NEXT: ldp q7, q6, [x1] +; VBITS_GE_128-NEXT: movprfx z4, z3 +; VBITS_GE_128-NEXT: sdiv z4.s, p0/m, z4.s, z6.s ; VBITS_GE_128-NEXT: mls v1.4s, v16.4s, v5.4s ; VBITS_GE_128-NEXT: movprfx z5, z2 ; VBITS_GE_128-NEXT: sdiv z5.s, p0/m, z5.s, z7.s @@ -1407,14 +1407,14 @@ ; VBITS_GE_128-NEXT: ptrue p0.s, vl4 ; VBITS_GE_128-NEXT: ldp q2, q3, [x0] ; VBITS_GE_128-NEXT: ldp q4, q5, [x1, #32] -; VBITS_GE_128-NEXT: ldp q7, q6, [x1] ; VBITS_GE_128-NEXT: movprfx z16, z0 ; VBITS_GE_128-NEXT: udiv z16.s, p0/m, z16.s, z4.s ; VBITS_GE_128-NEXT: mls v0.4s, v16.4s, v4.4s -; VBITS_GE_128-NEXT: movprfx z4, z3 -; VBITS_GE_128-NEXT: udiv z4.s, p0/m, z4.s, z6.s ; VBITS_GE_128-NEXT: movprfx z16, z1 ; VBITS_GE_128-NEXT: udiv z16.s, p0/m, z16.s, z5.s +; VBITS_GE_128-NEXT: ldp q7, q6, [x1] +; VBITS_GE_128-NEXT: movprfx z4, z3 +; VBITS_GE_128-NEXT: udiv z4.s, p0/m, z4.s, z6.s ; VBITS_GE_128-NEXT: mls v1.4s, v16.4s, v5.4s ; VBITS_GE_128-NEXT: movprfx z5, z2 ; VBITS_GE_128-NEXT: udiv z5.s, p0/m, z5.s, z7.s diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll @@ -578,8 +578,8 @@ ; CHECK-NEXT: fcvtzs z4.d, p0/m, z4.d ; CHECK-NEXT: mov z3.s, z2.s[1] ; CHECK-NEXT: uzp1 z4.s, z4.s, z4.s -; CHECK-NEXT: fcvtzs z5.d, p0/m, z5.d ; CHECK-NEXT: ldp q0, q1, [x0, #64] +; CHECK-NEXT: fcvtzs z5.d, p0/m, z5.d ; CHECK-NEXT: uzp1 z5.s, z5.s, z5.s ; CHECK-NEXT: fmov w10, s5 ; CHECK-NEXT: mov z5.s, z5.s[1] @@ -590,18 +590,18 @@ ; CHECK-NEXT: fmov w8, s4 ; CHECK-NEXT: strh w9, [sp, #8] ; CHECK-NEXT: fmov w9, s6 -; CHECK-NEXT: strh w10, [sp, #4] ; CHECK-NEXT: mov z4.s, z4.s[1] +; CHECK-NEXT: strh w10, [sp, #4] ; CHECK-NEXT: strh w8, [sp] ; CHECK-NEXT: fmov w8, s3 -; CHECK-NEXT: strh w9, [sp, #14] ; CHECK-NEXT: movprfx z3, z7 ; CHECK-NEXT: fcvtzs z3.d, p0/m, z7.d -; CHECK-NEXT: uzp1 z3.s, z3.s, z3.s ; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.d +; CHECK-NEXT: uzp1 z3.s, z3.s, z3.s +; CHECK-NEXT: uzp1 z2.s, z2.s, z2.s ; CHECK-NEXT: strh w8, [sp, #10] ; CHECK-NEXT: fmov w8, s3 -; CHECK-NEXT: uzp1 z2.s, z2.s, z2.s +; CHECK-NEXT: strh w9, [sp, #14] ; CHECK-NEXT: fmov w9, s5 ; CHECK-NEXT: fmov w10, s4 ; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d @@ -1331,8 +1331,8 @@ ; CHECK-NEXT: fcvtzs z4.d, p0/m, z4.d ; CHECK-NEXT: mov z3.s, z2.s[1] ; CHECK-NEXT: uzp1 z4.s, z4.s, z4.s -; CHECK-NEXT: fcvtzs z5.d, p0/m, z5.d ; CHECK-NEXT: ldp q0, q1, [x0, #64] +; CHECK-NEXT: fcvtzs z5.d, p0/m, z5.d ; CHECK-NEXT: uzp1 z5.s, z5.s, z5.s ; CHECK-NEXT: fmov w10, s5 ; CHECK-NEXT: mov z5.s, z5.s[1] @@ -1343,18 +1343,18 @@ ; CHECK-NEXT: fmov w8, s4 ; CHECK-NEXT: strh w9, [sp, #8] ; CHECK-NEXT: fmov w9, s6 -; CHECK-NEXT: strh w10, [sp, #4] ; CHECK-NEXT: mov z4.s, z4.s[1] +; CHECK-NEXT: strh w10, [sp, #4] ; CHECK-NEXT: strh w8, [sp] ; CHECK-NEXT: fmov w8, s3 -; CHECK-NEXT: strh w9, [sp, #14] ; CHECK-NEXT: movprfx z3, z7 ; CHECK-NEXT: fcvtzs z3.d, p0/m, z7.d -; CHECK-NEXT: uzp1 z3.s, z3.s, z3.s ; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.d +; CHECK-NEXT: uzp1 z3.s, z3.s, z3.s +; CHECK-NEXT: uzp1 z2.s, z2.s, z2.s ; CHECK-NEXT: strh w8, [sp, #10] ; CHECK-NEXT: fmov w8, s3 -; CHECK-NEXT: uzp1 z2.s, z2.s, z2.s +; CHECK-NEXT: strh w9, [sp, #14] ; CHECK-NEXT: fmov w9, s5 ; CHECK-NEXT: fmov w10, s4 ; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll @@ -319,14 +319,14 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: movprfx z2, z0 -; CHECK-NEXT: lsr z2.s, p0/m, z2.s, #24 ; CHECK-NEXT: movprfx z3, z0 ; CHECK-NEXT: lsr z3.s, p0/m, z3.s, #8 -; CHECK-NEXT: movprfx z4, z1 -; CHECK-NEXT: lsr z4.s, p0/m, z4.s, #24 ; CHECK-NEXT: movprfx z5, z1 ; CHECK-NEXT: lsr z5.s, p0/m, z5.s, #8 +; CHECK-NEXT: movprfx z2, z0 +; CHECK-NEXT: lsr z2.s, p0/m, z2.s, #24 +; CHECK-NEXT: movprfx z4, z1 +; CHECK-NEXT: lsr z4.s, p0/m, z4.s, #24 ; CHECK-NEXT: and z3.s, z3.s, #0xff00 ; CHECK-NEXT: and z5.s, z5.s, #0xff00 ; CHECK-NEXT: orr z2.d, z3.d, z2.d @@ -356,10 +356,10 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl1 -; CHECK-NEXT: movprfx z1, z0 -; CHECK-NEXT: lsr z1.d, p0/m, z1.d, #56 ; CHECK-NEXT: movprfx z2, z0 ; CHECK-NEXT: lsr z2.d, p0/m, z2.d, #40 +; CHECK-NEXT: movprfx z1, z0 +; CHECK-NEXT: lsr z1.d, p0/m, z1.d, #56 ; CHECK-NEXT: movprfx z3, z0 ; CHECK-NEXT: lsr z3.d, p0/m, z3.d, #24 ; CHECK-NEXT: movprfx z4, z0 @@ -396,10 +396,10 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: movprfx z1, z0 -; CHECK-NEXT: lsr z1.d, p0/m, z1.d, #56 ; CHECK-NEXT: movprfx z2, z0 ; CHECK-NEXT: lsr z2.d, p0/m, z2.d, #40 +; CHECK-NEXT: movprfx z1, z0 +; CHECK-NEXT: lsr z1.d, p0/m, z1.d, #56 ; CHECK-NEXT: movprfx z3, z0 ; CHECK-NEXT: lsr z3.d, p0/m, z3.d, #24 ; CHECK-NEXT: movprfx z4, z0 @@ -436,14 +436,14 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: movprfx z2, z0 -; CHECK-NEXT: lsr z2.d, p0/m, z2.d, #56 ; CHECK-NEXT: movprfx z3, z0 ; CHECK-NEXT: lsr z3.d, p0/m, z3.d, #40 ; CHECK-NEXT: movprfx z4, z0 ; CHECK-NEXT: lsr z4.d, p0/m, z4.d, #24 ; CHECK-NEXT: movprfx z5, z0 ; CHECK-NEXT: lsr z5.d, p0/m, z5.d, #8 +; CHECK-NEXT: movprfx z2, z0 +; CHECK-NEXT: lsr z2.d, p0/m, z2.d, #56 ; CHECK-NEXT: and z3.d, z3.d, #0xff00 ; CHECK-NEXT: and z4.d, z4.d, #0xff0000 ; CHECK-NEXT: and z5.d, z5.d, #0xff000000 @@ -451,8 +451,6 @@ ; CHECK-NEXT: orr z3.d, z5.d, z4.d ; CHECK-NEXT: mov z6.d, z0.d ; CHECK-NEXT: mov z7.d, z0.d -; CHECK-NEXT: movprfx z16, z0 -; CHECK-NEXT: lsl z16.d, p0/m, z16.d, #56 ; CHECK-NEXT: orr z2.d, z3.d, z2.d ; CHECK-NEXT: and z6.d, z6.d, #0xff000000 ; CHECK-NEXT: and z7.d, z7.d, #0xff0000 @@ -463,6 +461,8 @@ ; CHECK-NEXT: orr z3.d, z4.d, z3.d ; CHECK-NEXT: movprfx z4, z1 ; CHECK-NEXT: lsr z4.d, p0/m, z4.d, #40 +; CHECK-NEXT: movprfx z16, z0 +; CHECK-NEXT: lsl z16.d, p0/m, z16.d, #56 ; CHECK-NEXT: and z0.d, z0.d, #0xff00 ; CHECK-NEXT: movprfx z5, z1 ; CHECK-NEXT: lsr z5.d, p0/m, z5.d, #56 diff --git a/llvm/test/tools/llvm-mca/AArch64/A64FX/A64FX-sve-instructions.s b/llvm/test/tools/llvm-mca/AArch64/A64FX/A64FX-sve-instructions.s --- a/llvm/test/tools/llvm-mca/AArch64/A64FX/A64FX-sve-instructions.s +++ b/llvm/test/tools/llvm-mca/AArch64/A64FX/A64FX-sve-instructions.s @@ -3928,7 +3928,7 @@ # CHECK-NEXT: 1 4 1.00 mov z21.s, p15/m, #-32768 # CHECK-NEXT: 1 4 0.50 mov z31.b, p15/m, z31.b # CHECK-NEXT: 1 6 1.00 U mov z31.b, p7/m, b31 -# CHECK-NEXT: 1 1 0.17 U movprfx z31, z6 +# CHECK-NEXT: 1 1 0.17 movprfx z31, z6 # CHECK-NEXT: 1 8 1.00 mov z31.b, p7/m, wsp # CHECK-NEXT: 1 6 1.00 mov z31.b, wsp # CHECK-NEXT: 1 4 1.00 mov z31.b, z31.b[63] diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-sve-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-sve-instructions.s --- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-sve-instructions.s +++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-sve-instructions.s @@ -5030,7 +5030,7 @@ # CHECK-NEXT: 1 2 0.50 mov z21.s, p15/m, #-32768 # CHECK-NEXT: 1 2 0.50 mov z31.b, p15/m, z31.b # CHECK-NEXT: 1 2 0.50 U mov z31.b, p7/m, b31 -# CHECK-NEXT: 1 2 0.50 U movprfx z31, z6 +# CHECK-NEXT: 1 2 0.50 movprfx z31, z6 # CHECK-NEXT: 2 5 1.00 mov z31.b, p7/m, wsp # CHECK-NEXT: 1 3 3.00 mov z31.b, wsp # CHECK-NEXT: 1 2 0.50 mov z31.b, z31.b[63]