Index: llvm/lib/Target/AArch64/SVEInstrFormats.td =================================================================== --- llvm/lib/Target/AArch64/SVEInstrFormats.td +++ llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -7916,6 +7916,8 @@ let Inst{10} = opc{0}; let Inst{9-5} = Zn; let Inst{4-0} = Zd; + + let hasSideEffects = 0; } multiclass sve_int_bin_cons_misc_0_c_fexpa { Index: llvm/test/CodeGen/AArch64/sched-movprfx.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AArch64/sched-movprfx.ll @@ -0,0 +1,29 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple aarch64 -mcpu=tsv110 -mattr=+sve < %s | FileCheck %s + +; Check that the movprfx intrinsic does not prevent load instructions from +; being scheduled together. As load instructions have long latency, expected +; be preferentially issued. + + +; NOTE: The unused paramter ensures z0/z1 is free, avoiding the antidependence for schedule. +define @and_i64_zero( %pg, %a, %b, %c, * %base) { +; CHECK-LABEL: and_i64_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0] +; CHECK-NEXT: ptrue p1.d +; CHECK-NEXT: movprfx z0, z2 +; CHECK-NEXT: abs z0.d, p1/m, z2.d +; CHECK-NEXT: add z0.d, z0.d, z1.d +; CHECK-NEXT: ret + %data0 = tail call @llvm.abs.nxv2i64( %c, i1 0) + %data1 = call @llvm.masked.load.nxv2i64(* %base, + i32 1, + %pg, + undef) + %out = add %data0, %data1 + ret %out +} + +declare @llvm.abs.nxv2i64(, i1) +declare @llvm.masked.load.nxv2i64(*, i32, , )