diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -3802,7 +3802,7 @@ // Floating point immediate move. //===----------------------------------------------------------------------===// -let isReMaterializable = 1 in { +let isReMaterializable = 1, isAsCheapAsAMove = 1 in { defm FMOV : FPMoveImmediate<"fmov">; } diff --git a/llvm/test/CodeGen/AArch64/arm64-aapcs.ll b/llvm/test/CodeGen/AArch64/arm64-aapcs.ll --- a/llvm/test/CodeGen/AArch64/arm64-aapcs.ll +++ b/llvm/test/CodeGen/AArch64/arm64-aapcs.ll @@ -90,8 +90,8 @@ ; others. The extra arguments should go in registers rather than on the stack. define void @test_variadic() { call void(i32, ...) @variadic(i32 0, i64 1, double 2.0) -; CHECK: fmov d0, #2.0 ; CHECK: mov w1, #1 +; CHECK: fmov d0, #2.0 ; CHECK: bl variadic ret void } diff --git a/llvm/test/CodeGen/AArch64/fmov-imm-licm.ll b/llvm/test/CodeGen/AArch64/fmov-imm-licm.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/fmov-imm-licm.ll @@ -0,0 +1,33 @@ +; RUN: llc -mtriple=aarch64-linux-gnu -o - %s | FileCheck %s + +; The purpose of this test is to check that an FMOV instruction that +; only materializes an immediate is not MachineLICM'd out of a loop. +; We check this in two ways: by looking for the FMOV inside the loop, +; and also by checking that we're not spilling any FP callee-saved +; registers. + +%struct.Node = type { %struct.Node*, i8* } + +define void @process_nodes(%struct.Node* %0) { +; CHECK-LABEL: process_nodes: +; CHECK-NOT: stp {{d[0-9]+}} +; CHECK-LABEL: .LBB0_2: +; CHECK: fmov s0, #1.00000000 +; CHECK: bl do_it +entry: + %1 = icmp eq %struct.Node* %0, null + br i1 %1, label %exit, label %loop + +loop: + %2 = phi %struct.Node* [ %4, %loop ], [ %0, %entry ] + tail call void @do_it(float 1.000000e+00, %struct.Node* nonnull %2) + %3 = getelementptr inbounds %struct.Node, %struct.Node* %2, i64 0, i32 0 + %4 = load %struct.Node*, %struct.Node** %3, align 8 + %5 = icmp eq %struct.Node* %4, null + br i1 %5, label %exit, label %loop + +exit: + ret void +} + +declare void @do_it(float, %struct.Node*) diff --git a/llvm/test/CodeGen/AArch64/fp-cond-sel.ll b/llvm/test/CodeGen/AArch64/fp-cond-sel.ll --- a/llvm/test/CodeGen/AArch64/fp-cond-sel.ll +++ b/llvm/test/CodeGen/AArch64/fp-cond-sel.ll @@ -20,8 +20,8 @@ %tst2 = icmp sle i64 %lhs64, %rhs64 %val2 = select i1 %tst2, double 1.0, double 0.0 store double %val2, double* @vardouble -; FLT0 is reused from above on ARM64. -; CHECK: fmov d[[FLT1:[0-9]+]], #1.0 +; CHECK-DAG: fmov d[[FLT0:[0-9]+]], xzr +; CHECK-DAG: fmov d[[FLT1:[0-9]+]], #1.0 ; CHECK: fcsel {{d[0-9]+}}, d[[FLT1]], d[[FLT0]], le call void @use_float(float 0.0) diff --git a/llvm/test/CodeGen/AArch64/func-calls.ll b/llvm/test/CodeGen/AArch64/func-calls.ll --- a/llvm/test/CodeGen/AArch64/func-calls.ll +++ b/llvm/test/CodeGen/AArch64/func-calls.ll @@ -90,12 +90,10 @@ ; memcpy gets created, but the following works for now. ; CHECK-DAG: str {{q[0-9]+}}, [sp] -; CHECK-DAG: fmov d[[FINAL_DOUBLE:[0-9]+]], #1.0 -; CHECK: mov v0.16b, v[[FINAL_DOUBLE]].16b +; CHECK-DAG: fmov d0, #1.0 ; CHECK-NONEON-DAG: str {{q[0-9]+}}, [sp] -; CHECK-NONEON-DAG: fmov d[[FINAL_DOUBLE:[0-9]+]], #1.0 -; CHECK-NONEON: fmov d0, d[[FINAL_DOUBLE]] +; CHECK-NONEON-DAG: fmov d0, #1.0 ; CHECK: bl struct_on_stack ; CHECK-NOFP-NOT: fmov diff --git a/llvm/test/CodeGen/AArch64/pow.ll b/llvm/test/CodeGen/AArch64/pow.ll --- a/llvm/test/CodeGen/AArch64/pow.ll +++ b/llvm/test/CodeGen/AArch64/pow.ll @@ -69,16 +69,14 @@ ; CHECK-LABEL: pow_v4f32_one_fourth_not_enough_fmf: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #48 // =48 -; CHECK-NEXT: str d8, [sp, #32] // 8-byte Folded Spill -; CHECK-NEXT: fmov s8, #0.25000000 ; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: mov s0, v0.s[1] -; CHECK-NEXT: mov v1.16b, v8.16b -; CHECK-NEXT: str x30, [sp, #40] // 8-byte Folded Spill +; CHECK-NEXT: fmov s1, #0.25000000 +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill ; CHECK-NEXT: bl powf ; CHECK-NEXT: str d0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: fmov s1, #0.25000000 ; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: mov v1.16b, v8.16b ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-NEXT: bl powf ; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload @@ -86,7 +84,7 @@ ; CHECK-NEXT: mov v0.s[1], v1.s[0] ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: mov v1.16b, v8.16b +; CHECK-NEXT: fmov s1, #0.25000000 ; CHECK-NEXT: mov s0, v0.s[2] ; CHECK-NEXT: bl powf ; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload @@ -94,12 +92,11 @@ ; CHECK-NEXT: mov v1.s[2], v0.s[0] ; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill -; CHECK-NEXT: mov v1.16b, v8.16b +; CHECK-NEXT: fmov s1, #0.25000000 ; CHECK-NEXT: mov s0, v0.s[3] ; CHECK-NEXT: bl powf ; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload -; CHECK-NEXT: ldr d8, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-NEXT: mov v1.s[3], v0.s[0] ; CHECK-NEXT: mov v0.16b, v1.16b @@ -113,21 +110,18 @@ ; CHECK-LABEL: pow_v2f64_one_fourth_not_enough_fmf: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #48 // =48 -; CHECK-NEXT: str d8, [sp, #32] // 8-byte Folded Spill -; CHECK-NEXT: fmov d8, #0.25000000 ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: mov d0, v0.d[1] -; CHECK-NEXT: mov v1.16b, v8.16b -; CHECK-NEXT: str x30, [sp, #40] // 8-byte Folded Spill +; CHECK-NEXT: fmov d1, #0.25000000 +; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill ; CHECK-NEXT: bl pow ; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: fmov d1, #0.25000000 ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: mov v1.16b, v8.16b ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: bl pow ; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload -; CHECK-NEXT: ldr d8, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: add sp, sp, #48 // =48 diff --git a/llvm/test/CodeGen/AArch64/swifterror.ll b/llvm/test/CodeGen/AArch64/swifterror.ll --- a/llvm/test/CodeGen/AArch64/swifterror.ll +++ b/llvm/test/CodeGen/AArch64/swifterror.ll @@ -339,14 +339,14 @@ ; CHECK-APPLE: malloc ; First vararg -; CHECK-APPLE-AARCH64: ldr {{w[0-9]+}}, [{{.*}}[[TMP:x[0-9]+]], #16] ; CHECK-APPLE-AARCH64: mov [[ID:w[0-9]+]], #1 +; CHECK-APPLE-AARCH64: ldr {{w[0-9]+}}, [{{.*}}[[TMP:x[0-9]+]], #16] ; CHECK-APPLE-AARCH64: add [[ARGS:x[0-9]+]], [[TMP]], #16 +; Third vararg +; CHECK-APPLE-AARCH64: ldr {{w[0-9]+}}, [{{.*}}[[TMP]], #32] ; CHECK-APPLE-AARCH64: strb [[ID]], [x0, #8] ; Second vararg ; CHECK-APPLE-AARCH64: ldr {{w[0-9]+}}, [{{.*}}[[TMP]], #24] -; Third vararg -; CHECK-APPLE-AARCH64: ldr {{w[0-9]+}}, [{{.*}}[[TMP]], #32] ; CHECK-APPLE-ARM64_32: mov [[ID:w[0-9]+]], #1 ; CHECK-APPLE-ARM64_32: add [[ARGS:x[0-9]+]], [[TMP:x[0-9]+]], #16