diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td --- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -1178,6 +1178,13 @@ return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i32); }]>; +def fpimm32SIMDModImmType4XForm : SDNodeXFormgetValueAPF() + .bitcastToAPInt() + .getZExtValue()); + return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i32); + }]>; + def fpimm64XForm : SDNodeXFormgetValueAPF(); uint32_t enc = AArch64_AM::getFP64Imm(InVal); @@ -1199,6 +1206,13 @@ let ParserMatchClass = FPImmOperand; let PrintMethod = "printFPImmOperand"; } + +def fpimm32SIMDModImmType4 : FPImmLeaf { +} + def fpimm64 : Operand, FPImmLeaf; +let Predicates = [HasNEON] in { + // Using the MOVI to materialize fp constants. + def : Pat<(f32 fpimm32SIMDModImmType4:$in), + (EXTRACT_SUBREG (MOVIv2i32 (fpimm32SIMDModImmType4XForm f32:$in), + (i32 24)), + ssub)>; +} + def : InstAlias<"movi $Vd.4h, $imm", (MOVIv4i16 V64:$Vd, imm0_255:$imm, 0), 0>; def : InstAlias<"movi $Vd.8h, $imm", (MOVIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>; def : InstAlias<"movi $Vd.2s, $imm", (MOVIv2i32 V64:$Vd, imm0_255:$imm, 0), 0>; diff --git a/llvm/test/CodeGen/AArch64/fabs.ll b/llvm/test/CodeGen/AArch64/fabs.ll --- a/llvm/test/CodeGen/AArch64/fabs.ll +++ b/llvm/test/CodeGen/AArch64/fabs.ll @@ -22,9 +22,8 @@ define float @still_not_fabs(float %x) #0 { ; CHECK-LABEL: still_not_fabs: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-2147483648 +; CHECK-NEXT: movi v1.2s, #128, lsl #24 ; CHECK-NEXT: fneg s2, s0 -; CHECK-NEXT: fmov s1, w8 ; CHECK-NEXT: fcmp s0, s1 ; CHECK-NEXT: fcsel s0, s0, s2, ge ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/fcvt-fixed.ll b/llvm/test/CodeGen/AArch64/fcvt-fixed.ll --- a/llvm/test/CodeGen/AArch64/fcvt-fixed.ll +++ b/llvm/test/CodeGen/AArch64/fcvt-fixed.ll @@ -87,9 +87,8 @@ define i32 @fcvtzs_f16_i32_7(half %flt) { ; CHECK-NO16-LABEL: fcvtzs_f16_i32_7: ; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: mov w8, #1124073472 +; CHECK-NO16-NEXT: movi v1.2s, #67, lsl #24 ; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fmov s1, w8 ; CHECK-NO16-NEXT: fmul s0, s0, s1 ; CHECK-NO16-NEXT: fcvt h0, s0 ; CHECK-NO16-NEXT: fcvt s0, h0 @@ -108,9 +107,8 @@ define i32 @fcvtzs_f16_i32_15(half %flt) { ; CHECK-NO16-LABEL: fcvtzs_f16_i32_15: ; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: mov w8, #1191182336 +; CHECK-NO16-NEXT: movi v1.2s, #71, lsl #24 ; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fmov s1, w8 ; CHECK-NO16-NEXT: fmul s0, s0, s1 ; CHECK-NO16-NEXT: fcvt h0, s0 ; CHECK-NO16-NEXT: fcvt s0, h0 @@ -129,9 +127,8 @@ define i64 @fcvtzs_f16_i64_7(half %flt) { ; CHECK-NO16-LABEL: fcvtzs_f16_i64_7: ; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: mov w8, #1124073472 +; CHECK-NO16-NEXT: movi v1.2s, #67, lsl #24 ; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fmov s1, w8 ; CHECK-NO16-NEXT: fmul s0, s0, s1 ; CHECK-NO16-NEXT: fcvt h0, s0 ; CHECK-NO16-NEXT: fcvt s0, h0 @@ -150,9 +147,8 @@ define i64 @fcvtzs_f16_i64_15(half %flt) { ; CHECK-NO16-LABEL: fcvtzs_f16_i64_15: ; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: mov w8, #1191182336 +; CHECK-NO16-NEXT: movi v1.2s, #71, lsl #24 ; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fmov s1, w8 ; CHECK-NO16-NEXT: fmul s0, s0, s1 ; CHECK-NO16-NEXT: fcvt h0, s0 ; CHECK-NO16-NEXT: fcvt s0, h0 @@ -253,9 +249,8 @@ define i32 @fcvtzu_f16_i32_7(half %flt) { ; CHECK-NO16-LABEL: fcvtzu_f16_i32_7: ; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: mov w8, #1124073472 +; CHECK-NO16-NEXT: movi v1.2s, #67, lsl #24 ; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fmov s1, w8 ; CHECK-NO16-NEXT: fmul s0, s0, s1 ; CHECK-NO16-NEXT: fcvt h0, s0 ; CHECK-NO16-NEXT: fcvt s0, h0 @@ -274,9 +269,8 @@ define i32 @fcvtzu_f16_i32_15(half %flt) { ; CHECK-NO16-LABEL: fcvtzu_f16_i32_15: ; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: mov w8, #1191182336 +; CHECK-NO16-NEXT: movi v1.2s, #71, lsl #24 ; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fmov s1, w8 ; CHECK-NO16-NEXT: fmul s0, s0, s1 ; CHECK-NO16-NEXT: fcvt h0, s0 ; CHECK-NO16-NEXT: fcvt s0, h0 @@ -295,9 +289,8 @@ define i64 @fcvtzu_f16_i64_7(half %flt) { ; CHECK-NO16-LABEL: fcvtzu_f16_i64_7: ; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: mov w8, #1124073472 +; CHECK-NO16-NEXT: movi v1.2s, #67, lsl #24 ; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fmov s1, w8 ; CHECK-NO16-NEXT: fmul s0, s0, s1 ; CHECK-NO16-NEXT: fcvt h0, s0 ; CHECK-NO16-NEXT: fcvt s0, h0 @@ -316,9 +309,8 @@ define i64 @fcvtzu_f16_i64_15(half %flt) { ; CHECK-NO16-LABEL: fcvtzu_f16_i64_15: ; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: mov w8, #1191182336 +; CHECK-NO16-NEXT: movi v1.2s, #71, lsl #24 ; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fmov s1, w8 ; CHECK-NO16-NEXT: fmul s0, s0, s1 ; CHECK-NO16-NEXT: fcvt h0, s0 ; CHECK-NO16-NEXT: fcvt s0, h0 @@ -419,12 +411,11 @@ define half @scvtf_f16_i32_7(i32 %int) { ; CHECK-NO16-LABEL: scvtf_f16_i32_7: ; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: scvtf s0, w0 -; CHECK-NO16-NEXT: mov w8, #1124073472 -; CHECK-NO16-NEXT: fmov s1, w8 -; CHECK-NO16-NEXT: fcvt h0, s0 -; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fdiv s0, s0, s1 +; CHECK-NO16-NEXT: scvtf s1, w0 +; CHECK-NO16-NEXT: movi v0.2s, #67, lsl #24 +; CHECK-NO16-NEXT: fcvt h1, s1 +; CHECK-NO16-NEXT: fcvt s1, h1 +; CHECK-NO16-NEXT: fdiv s0, s1, s0 ; CHECK-NO16-NEXT: fcvt h0, s0 ; CHECK-NO16-NEXT: ret ; @@ -440,12 +431,11 @@ define half @scvtf_f16_i32_15(i32 %int) { ; CHECK-NO16-LABEL: scvtf_f16_i32_15: ; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: scvtf s0, w0 -; CHECK-NO16-NEXT: mov w8, #1191182336 -; CHECK-NO16-NEXT: fmov s1, w8 -; CHECK-NO16-NEXT: fcvt h0, s0 -; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fdiv s0, s0, s1 +; CHECK-NO16-NEXT: scvtf s1, w0 +; CHECK-NO16-NEXT: movi v0.2s, #71, lsl #24 +; CHECK-NO16-NEXT: fcvt h1, s1 +; CHECK-NO16-NEXT: fcvt s1, h1 +; CHECK-NO16-NEXT: fdiv s0, s1, s0 ; CHECK-NO16-NEXT: fcvt h0, s0 ; CHECK-NO16-NEXT: ret ; @@ -461,12 +451,11 @@ define half @scvtf_f16_i64_7(i64 %long) { ; CHECK-NO16-LABEL: scvtf_f16_i64_7: ; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: scvtf s0, x0 -; CHECK-NO16-NEXT: mov w8, #1124073472 -; CHECK-NO16-NEXT: fmov s1, w8 -; CHECK-NO16-NEXT: fcvt h0, s0 -; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fdiv s0, s0, s1 +; CHECK-NO16-NEXT: scvtf s1, x0 +; CHECK-NO16-NEXT: movi v0.2s, #67, lsl #24 +; CHECK-NO16-NEXT: fcvt h1, s1 +; CHECK-NO16-NEXT: fcvt s1, h1 +; CHECK-NO16-NEXT: fdiv s0, s1, s0 ; CHECK-NO16-NEXT: fcvt h0, s0 ; CHECK-NO16-NEXT: ret ; @@ -482,12 +471,11 @@ define half @scvtf_f16_i64_15(i64 %long) { ; CHECK-NO16-LABEL: scvtf_f16_i64_15: ; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: scvtf s0, x0 -; CHECK-NO16-NEXT: mov w8, #1191182336 -; CHECK-NO16-NEXT: fmov s1, w8 -; CHECK-NO16-NEXT: fcvt h0, s0 -; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fdiv s0, s0, s1 +; CHECK-NO16-NEXT: scvtf s1, x0 +; CHECK-NO16-NEXT: movi v0.2s, #71, lsl #24 +; CHECK-NO16-NEXT: fcvt h1, s1 +; CHECK-NO16-NEXT: fcvt s1, h1 +; CHECK-NO16-NEXT: fdiv s0, s1, s0 ; CHECK-NO16-NEXT: fcvt h0, s0 ; CHECK-NO16-NEXT: ret ; @@ -585,12 +573,11 @@ define half @ucvtf_f16_i32_7(i32 %int) { ; CHECK-NO16-LABEL: ucvtf_f16_i32_7: ; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: ucvtf s0, w0 -; CHECK-NO16-NEXT: mov w8, #1124073472 -; CHECK-NO16-NEXT: fmov s1, w8 -; CHECK-NO16-NEXT: fcvt h0, s0 -; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fdiv s0, s0, s1 +; CHECK-NO16-NEXT: ucvtf s1, w0 +; CHECK-NO16-NEXT: movi v0.2s, #67, lsl #24 +; CHECK-NO16-NEXT: fcvt h1, s1 +; CHECK-NO16-NEXT: fcvt s1, h1 +; CHECK-NO16-NEXT: fdiv s0, s1, s0 ; CHECK-NO16-NEXT: fcvt h0, s0 ; CHECK-NO16-NEXT: ret ; @@ -606,12 +593,11 @@ define half @ucvtf_f16_i32_15(i32 %int) { ; CHECK-NO16-LABEL: ucvtf_f16_i32_15: ; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: ucvtf s0, w0 -; CHECK-NO16-NEXT: mov w8, #1191182336 -; CHECK-NO16-NEXT: fmov s1, w8 -; CHECK-NO16-NEXT: fcvt h0, s0 -; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fdiv s0, s0, s1 +; CHECK-NO16-NEXT: ucvtf s1, w0 +; CHECK-NO16-NEXT: movi v0.2s, #71, lsl #24 +; CHECK-NO16-NEXT: fcvt h1, s1 +; CHECK-NO16-NEXT: fcvt s1, h1 +; CHECK-NO16-NEXT: fdiv s0, s1, s0 ; CHECK-NO16-NEXT: fcvt h0, s0 ; CHECK-NO16-NEXT: ret ; @@ -627,12 +613,11 @@ define half @ucvtf_f16_i64_7(i64 %long) { ; CHECK-NO16-LABEL: ucvtf_f16_i64_7: ; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: ucvtf s0, x0 -; CHECK-NO16-NEXT: mov w8, #1124073472 -; CHECK-NO16-NEXT: fmov s1, w8 -; CHECK-NO16-NEXT: fcvt h0, s0 -; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fdiv s0, s0, s1 +; CHECK-NO16-NEXT: ucvtf s1, x0 +; CHECK-NO16-NEXT: movi v0.2s, #67, lsl #24 +; CHECK-NO16-NEXT: fcvt h1, s1 +; CHECK-NO16-NEXT: fcvt s1, h1 +; CHECK-NO16-NEXT: fdiv s0, s1, s0 ; CHECK-NO16-NEXT: fcvt h0, s0 ; CHECK-NO16-NEXT: ret ; @@ -648,12 +633,11 @@ define half @ucvtf_f16_i64_15(i64 %long) { ; CHECK-NO16-LABEL: ucvtf_f16_i64_15: ; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: ucvtf s0, x0 -; CHECK-NO16-NEXT: mov w8, #1191182336 -; CHECK-NO16-NEXT: fmov s1, w8 -; CHECK-NO16-NEXT: fcvt h0, s0 -; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fdiv s0, s0, s1 +; CHECK-NO16-NEXT: ucvtf s1, x0 +; CHECK-NO16-NEXT: movi v0.2s, #71, lsl #24 +; CHECK-NO16-NEXT: fcvt h1, s1 +; CHECK-NO16-NEXT: fcvt s1, h1 +; CHECK-NO16-NEXT: fdiv s0, s1, s0 ; CHECK-NO16-NEXT: fcvt h0, s0 ; CHECK-NO16-NEXT: ret ; @@ -749,9 +733,8 @@ define i32 @fcvtzs_sat_f16_i32_7(half %dbl) { ; CHECK-NO16-LABEL: fcvtzs_sat_f16_i32_7: ; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: mov w8, #1124073472 +; CHECK-NO16-NEXT: movi v1.2s, #67, lsl #24 ; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fmov s1, w8 ; CHECK-NO16-NEXT: fmul s0, s0, s1 ; CHECK-NO16-NEXT: fcvt h0, s0 ; CHECK-NO16-NEXT: fcvt s0, h0 @@ -770,9 +753,8 @@ define i32 @fcvtzs_sat_f16_i32_15(half %dbl) { ; CHECK-NO16-LABEL: fcvtzs_sat_f16_i32_15: ; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: mov w8, #1191182336 +; CHECK-NO16-NEXT: movi v1.2s, #71, lsl #24 ; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fmov s1, w8 ; CHECK-NO16-NEXT: fmul s0, s0, s1 ; CHECK-NO16-NEXT: fcvt h0, s0 ; CHECK-NO16-NEXT: fcvt s0, h0 @@ -791,9 +773,8 @@ define i64 @fcvtzs_sat_f16_i64_7(half %dbl) { ; CHECK-NO16-LABEL: fcvtzs_sat_f16_i64_7: ; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: mov w8, #1124073472 +; CHECK-NO16-NEXT: movi v1.2s, #67, lsl #24 ; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fmov s1, w8 ; CHECK-NO16-NEXT: fmul s0, s0, s1 ; CHECK-NO16-NEXT: fcvt h0, s0 ; CHECK-NO16-NEXT: fcvt s0, h0 @@ -812,9 +793,8 @@ define i64 @fcvtzs_sat_f16_i64_15(half %dbl) { ; CHECK-NO16-LABEL: fcvtzs_sat_f16_i64_15: ; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: mov w8, #1191182336 +; CHECK-NO16-NEXT: movi v1.2s, #71, lsl #24 ; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fmov s1, w8 ; CHECK-NO16-NEXT: fmul s0, s0, s1 ; CHECK-NO16-NEXT: fcvt h0, s0 ; CHECK-NO16-NEXT: fcvt s0, h0 @@ -912,9 +892,8 @@ define i32 @fcvtzu_sat_f16_i32_7(half %dbl) { ; CHECK-NO16-LABEL: fcvtzu_sat_f16_i32_7: ; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: mov w8, #1124073472 +; CHECK-NO16-NEXT: movi v1.2s, #67, lsl #24 ; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fmov s1, w8 ; CHECK-NO16-NEXT: fmul s0, s0, s1 ; CHECK-NO16-NEXT: fcvt h0, s0 ; CHECK-NO16-NEXT: fcvt s0, h0 @@ -933,9 +912,8 @@ define i32 @fcvtzu_sat_f16_i32_15(half %dbl) { ; CHECK-NO16-LABEL: fcvtzu_sat_f16_i32_15: ; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: mov w8, #1191182336 +; CHECK-NO16-NEXT: movi v1.2s, #71, lsl #24 ; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fmov s1, w8 ; CHECK-NO16-NEXT: fmul s0, s0, s1 ; CHECK-NO16-NEXT: fcvt h0, s0 ; CHECK-NO16-NEXT: fcvt s0, h0 @@ -954,9 +932,8 @@ define i64 @fcvtzu_sat_f16_i64_7(half %dbl) { ; CHECK-NO16-LABEL: fcvtzu_sat_f16_i64_7: ; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: mov w8, #1124073472 +; CHECK-NO16-NEXT: movi v1.2s, #67, lsl #24 ; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fmov s1, w8 ; CHECK-NO16-NEXT: fmul s0, s0, s1 ; CHECK-NO16-NEXT: fcvt h0, s0 ; CHECK-NO16-NEXT: fcvt s0, h0 @@ -975,9 +952,8 @@ define i64 @fcvtzu_sat_f16_i64_15(half %dbl) { ; CHECK-NO16-LABEL: fcvtzu_sat_f16_i64_15: ; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: mov w8, #1191182336 +; CHECK-NO16-NEXT: movi v1.2s, #71, lsl #24 ; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fmov s1, w8 ; CHECK-NO16-NEXT: fmul s0, s0, s1 ; CHECK-NO16-NEXT: fcvt h0, s0 ; CHECK-NO16-NEXT: fcvt s0, h0 diff --git a/llvm/test/CodeGen/AArch64/fpimm.ll b/llvm/test/CodeGen/AArch64/fpimm.ll --- a/llvm/test/CodeGen/AArch64/fpimm.ll +++ b/llvm/test/CodeGen/AArch64/fpimm.ll @@ -1,5 +1,5 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs < %s | FileCheck %s -; RUN: llc -mtriple=aarch64-apple-darwin -code-model=large -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,LARGE +; RUN: llc -mtriple=aarch64-apple-darwin -code-model=large -verify-machineinstrs < %s | FileCheck %s --check-prefixes=LARGE ; RUN: llc -mtriple=aarch64-none-eabi -code-model=tiny -verify-machineinstrs < %s | FileCheck %s @varf32 = global float 0.0 @@ -15,8 +15,7 @@ %newval2 = fadd float %val, 128.0 store volatile float %newval2, float* @varf32 -; CHECK-DAG: mov [[W128:w[0-9]+]], #1124073472 -; CHECK-DAG: fmov {{s[0-9]+}}, [[W128]] +; CHECK-DAG: movi [[REG:v[0-9s]+]].2s, #67, lsl #24 ; CHECK: ret ret void diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll --- a/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll +++ b/llvm/test/CodeGen/AArch64/fptosi-sat-scalar.ll @@ -131,11 +131,10 @@ ; CHECK-NEXT: str x30, [sp, #8] // 8-byte Folded Spill ; CHECK-NEXT: fmov s8, s0 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: mov w8, #-251658240 +; CHECK-NEXT: movi v0.2s, #241, lsl #24 +; CHECK-NEXT: mov w8, #1895825407 ; CHECK-NEXT: mov x10, #34359738367 ; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: mov w8, #1895825407 ; CHECK-NEXT: fcmp s8, s0 ; CHECK-NEXT: fmov s0, w8 ; CHECK-NEXT: mov x8, #-34359738368 @@ -160,11 +159,10 @@ ; CHECK-NEXT: str x30, [sp, #8] // 8-byte Folded Spill ; CHECK-NEXT: fmov s8, s0 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: mov w8, #-16777216 +; CHECK-NEXT: movi v0.2s, #255, lsl #24 +; CHECK-NEXT: mov w8, #2130706431 ; CHECK-NEXT: mov x10, #9223372036854775807 ; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: mov w8, #2130706431 ; CHECK-NEXT: fcmp s8, s0 ; CHECK-NEXT: fmov s0, w8 ; CHECK-NEXT: mov x8, #-9223372036854775808 @@ -575,11 +573,10 @@ ; CHECK-NEXT: str x30, [sp, #8] // 8-byte Folded Spill ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: mov w8, #-251658240 +; CHECK-NEXT: movi v0.2s, #241, lsl #24 +; CHECK-NEXT: mov w8, #1895825407 ; CHECK-NEXT: mov x10, #34359738367 ; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: mov w8, #1895825407 ; CHECK-NEXT: fcmp s8, s0 ; CHECK-NEXT: fmov s0, w8 ; CHECK-NEXT: mov x8, #-34359738368 @@ -605,11 +602,10 @@ ; CHECK-NEXT: str x30, [sp, #8] // 8-byte Folded Spill ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: mov w8, #-16777216 +; CHECK-NEXT: movi v0.2s, #255, lsl #24 +; CHECK-NEXT: mov w8, #2130706431 ; CHECK-NEXT: mov x10, #9223372036854775807 ; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: mov w8, #2130706431 ; CHECK-NEXT: fcmp s8, s0 ; CHECK-NEXT: fmov s0, w8 ; CHECK-NEXT: mov x8, #-9223372036854775808 diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll --- a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll +++ b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll @@ -827,15 +827,14 @@ ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: mov w8, #-251658240 +; CHECK-NEXT: movi v9.2s, #241, lsl #24 +; CHECK-NEXT: mov w8, #1895825407 ; CHECK-NEXT: mov x21, #-34359738368 ; CHECK-NEXT: mov x22, #34359738367 ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: fmov s10, w8 ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 -; CHECK-NEXT: fmov s9, w8 -; CHECK-NEXT: mov w8, #1895825407 ; CHECK-NEXT: fcmp s8, s9 -; CHECK-NEXT: fmov s10, w8 ; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: csel x9, x21, x1, lt ; CHECK-NEXT: fcmp s8, s10 @@ -894,15 +893,14 @@ ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: mov w8, #-16777216 +; CHECK-NEXT: movi v9.2s, #255, lsl #24 +; CHECK-NEXT: mov w8, #2130706431 ; CHECK-NEXT: mov x21, #-9223372036854775808 ; CHECK-NEXT: mov x22, #9223372036854775807 ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: fmov s10, w8 ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 -; CHECK-NEXT: fmov s9, w8 -; CHECK-NEXT: mov w8, #2130706431 ; CHECK-NEXT: fcmp s8, s9 -; CHECK-NEXT: fmov s10, w8 ; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: csel x9, x21, x1, lt ; CHECK-NEXT: fcmp s8, s10 @@ -1106,20 +1104,19 @@ ; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: mov w8, #-251658240 +; CHECK-NEXT: movi v9.2s, #241, lsl #24 +; CHECK-NEXT: mov w8, #1895825407 ; CHECK-NEXT: mov x25, #-34359738368 ; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: mov x26, #34359738367 -; CHECK-NEXT: fmov s9, w8 -; CHECK-NEXT: mov w8, #1895825407 -; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: fmov s10, w8 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 +; CHECK-NEXT: fcmp s8, s9 +; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: csel x9, x25, x1, lt ; CHECK-NEXT: fcmp s8, s10 +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-NEXT: csel x9, x26, x9, gt ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: fcmp s8, s8 @@ -1211,20 +1208,19 @@ ; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: mov w8, #-16777216 +; CHECK-NEXT: movi v9.2s, #255, lsl #24 +; CHECK-NEXT: mov w8, #2130706431 ; CHECK-NEXT: mov x25, #-9223372036854775808 ; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: mov x26, #9223372036854775807 -; CHECK-NEXT: fmov s9, w8 -; CHECK-NEXT: mov w8, #2130706431 -; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: fmov s10, w8 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 +; CHECK-NEXT: fcmp s8, s9 +; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: csel x9, x25, x1, lt ; CHECK-NEXT: fcmp s8, s10 +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-NEXT: csel x9, x26, x9, gt ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: fcmp s8, s8 @@ -1862,15 +1858,14 @@ ; CHECK-NEXT: fcvt s8, h1 ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: mov w8, #-251658240 +; CHECK-NEXT: movi v9.2s, #241, lsl #24 +; CHECK-NEXT: mov w8, #1895825407 ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: mov x25, #-34359738368 ; CHECK-NEXT: mov x26, #34359738367 -; CHECK-NEXT: fmov s9, w8 -; CHECK-NEXT: mov w8, #1895825407 -; CHECK-NEXT: mov h0, v0.h[2] -; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: fmov s10, w8 +; CHECK-NEXT: fcmp s8, s9 +; CHECK-NEXT: mov h0, v0.h[2] ; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: csel x9, x25, x1, lt ; CHECK-NEXT: fcmp s8, s10 @@ -1970,15 +1965,14 @@ ; CHECK-NEXT: fcvt s8, h1 ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: mov w8, #-16777216 +; CHECK-NEXT: movi v9.2s, #255, lsl #24 +; CHECK-NEXT: mov w8, #2130706431 ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: mov x25, #-9223372036854775808 ; CHECK-NEXT: mov x26, #9223372036854775807 -; CHECK-NEXT: fmov s9, w8 -; CHECK-NEXT: mov w8, #2130706431 -; CHECK-NEXT: mov h0, v0.h[2] -; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: fmov s10, w8 +; CHECK-NEXT: fcmp s8, s9 +; CHECK-NEXT: mov h0, v0.h[2] ; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: csel x9, x25, x1, lt ; CHECK-NEXT: fcmp s8, s10 @@ -2618,15 +2612,14 @@ ; CHECK-NEXT: fcvt s8, h0 ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: mov w8, #-251658240 +; CHECK-NEXT: movi v10.2s, #241, lsl #24 +; CHECK-NEXT: mov w8, #1895825407 ; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: mov x25, #-34359738368 ; CHECK-NEXT: mov x23, #34359738367 -; CHECK-NEXT: fmov s10, w8 -; CHECK-NEXT: mov w8, #1895825407 -; CHECK-NEXT: mov h0, v0.h[3] -; CHECK-NEXT: fcmp s8, s10 ; CHECK-NEXT: fmov s9, w8 +; CHECK-NEXT: fcmp s8, s10 +; CHECK-NEXT: mov h0, v0.h[3] ; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: csel x9, x25, x1, lt ; CHECK-NEXT: fcmp s8, s9 @@ -2827,15 +2820,14 @@ ; CHECK-NEXT: str q0, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: mov w8, #-16777216 +; CHECK-NEXT: movi v10.2s, #255, lsl #24 +; CHECK-NEXT: mov w8, #2130706431 ; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: mov x21, #-9223372036854775808 ; CHECK-NEXT: mov x22, #9223372036854775807 -; CHECK-NEXT: fmov s10, w8 -; CHECK-NEXT: mov w8, #2130706431 -; CHECK-NEXT: mov h0, v0.h[1] -; CHECK-NEXT: fcmp s8, s10 ; CHECK-NEXT: fmov s9, w8 +; CHECK-NEXT: fcmp s8, s10 +; CHECK-NEXT: mov h0, v0.h[1] ; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: csel x9, x21, x1, lt ; CHECK-NEXT: fcmp s8, s9 diff --git a/llvm/test/CodeGen/AArch64/remat-const-float-simd.ll b/llvm/test/CodeGen/AArch64/remat-const-float-simd.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/remat-const-float-simd.ll @@ -0,0 +1,33 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -mattr=+neon | FileCheck %s --check-prefixes=CHECK,CHECK-NEON +; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -verify-machineinstrs -mattr=-neon | FileCheck %s --check-prefixes=CHECK,CHECK-SCALAR + +; Check that big fp constants can be rematerialized with movi +target triple = "aarch64-unknown-linux-gnu" + +; float foo(void) { return float(2147483648); } +define float @foo() { +; CHECK-LABEL: foo: +; CHECK: // %bb.0: // %entry +; CHECK-NEON-NEXT: movi v0.2s, #79, lsl #24 +; CHECK-SCALAR-NEXT: mov w8, #1325400064 +; CHECK-SCALAR-NEXT: fmov s0, w8 +; CHECK-NEXT: ret +entry: + ret float 0x41E0000000000000 +} + +; float foo2(float p) { return p + float(2147483648); } +define float @foo2(float %f) { +; CHECK-LABEL: foo2: +; CHECK: // %bb.0: // %entry +; CHECK-NEON-NEXT: movi v1.2s, #79, lsl #24 +; CHECK-NEON-NEXT: fadd s0, s0, s1 +; CHECK-SCALAR-NEXT: mov w8, #1325400064 +; CHECK-SCALAR-NEXT: fmov s1, w8 +; CHECK-SCALAR-NEXT: fadd s0, s0, s1 +; CHECK-NEXT: ret +entry: + %p = fadd float %f, 0x41E0000000000000 + ret float %p +} diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization.ll --- a/llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization.ll @@ -48,8 +48,7 @@ define float @test_v3f32(<3 x float> %a) nounwind { ; CHECK-LABEL: test_v3f32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-2147483648 -; CHECK-NEXT: fmov s1, w8 +; CHECK-NEXT: movi v1.2s, #128, lsl #24 ; CHECK-NEXT: mov v0.s[3], v1.s[0] ; CHECK-NEXT: faddp v0.4s, v0.4s, v0.4s ; CHECK-NEXT: faddp s0, v0.2s