Index: lib/CodeGen/MachineScheduler.cpp =================================================================== --- lib/CodeGen/MachineScheduler.cpp +++ lib/CodeGen/MachineScheduler.cpp @@ -2774,6 +2774,19 @@ Reason)) { return true; } + + // If one of the candidates does neither increase or decrease pressure, but + // the other does increase the pressure, go with it. + if (TryP.getUnitInc() == 0 && CandP.getUnitInc() > 0) { + TryCand.Reason = Reason; + return true; + } + if (TryP.getUnitInc() > 0 && CandP.getUnitInc() == 0) { + if (Cand.Reason > Reason) + Cand.Reason = Reason; + return true; + } + // Do not compare the magnitude of pressure changes between top and bottom // boundary. if (Cand.AtTop != TryCand.AtTop) Index: test/CodeGen/AArch64/vector-fcopysign.ll =================================================================== --- test/CodeGen/AArch64/vector-fcopysign.ll +++ test/CodeGen/AArch64/vector-fcopysign.ll @@ -94,22 +94,22 @@ define <4 x float> @test_copysign_v4f32_v4f64(<4 x float> %a, <4 x double> %b) #0 { ; CHECK-LABEL: test_copysign_v4f32_v4f64: ; CHECK-NEXT: mov s3, v0[1] -; CHECK-NEXT: movi.4s v4, #128, lsl #24 -; CHECK-NEXT: fcvt s5, d1 -; CHECK-NEXT: mov s6, v0[2] -; CHECK-NEXT: mov s7, v0[3] -; CHECK-NEXT: bit.16b v0, v5, v4 -; CHECK-NEXT: fcvt s5, d2 -; CHECK-NEXT: bit.16b v6, v5, v4 -; CHECK-NEXT: mov d1, v1[1] +; CHECK-NEXT: mov d4, v1[1] +; CHECK-NEXT: fcvt s4, d4 +; CHECK-NEXT: movi.4s v5, #128, lsl #24 ; CHECK-NEXT: fcvt s1, d1 -; CHECK-NEXT: bit.16b v3, v1, v4 +; CHECK-NEXT: bit.16b v3, v4, v5 +; CHECK-NEXT: mov s4, v0[2] +; CHECK-NEXT: mov s6, v0[3] +; CHECK-NEXT: bit.16b v0, v1, v5 +; CHECK-NEXT: fcvt s1, d2 +; CHECK-NEXT: bit.16b v4, v1, v5 ; CHECK-NEXT: mov d1, v2[1] ; CHECK-NEXT: fcvt s1, d1 ; CHECK-NEXT: ins.s v0[1], v3[0] -; CHECK-NEXT: ins.s v0[2], v6[0] -; CHECK-NEXT: bit.16b v7, v1, v4 -; CHECK-NEXT: ins.s v0[3], v7[0] +; CHECK-NEXT: ins.s v0[2], v4[0] +; CHECK-NEXT: bit.16b v6, v1, v5 +; CHECK-NEXT: ins.s v0[3], v6[0] ; CHECK-NEXT: ret %tmp0 = fptrunc <4 x double> %b to <4 x float> %r = call <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %tmp0)