Index: lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- lib/Target/AArch64/AArch64ISelLowering.cpp +++ lib/Target/AArch64/AArch64ISelLowering.cpp @@ -17,6 +17,7 @@ #include "AArch64RegisterInfo.h" #include "AArch64Subtarget.h" #include "MCTargetDesc/AArch64AddressingModes.h" +#include "MCTargetDesc/AArch64ExpandImm.h" #include "Utils/AArch64BaseInfo.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" @@ -5410,9 +5411,18 @@ // If we can not materialize in immediate field for fmov, check if the // value can be encoded as the immediate operand of a logical instruction. // The immediate value will be created with either MOVZ, MOVN, or ORR. - if (!IsLegal && (VT == MVT::f64 || VT == MVT::f32)) - IsLegal = AArch64_AM::isAnyMOVWMovAlias(ImmInt.getZExtValue(), - VT.getSizeInBits()); + if (!IsLegal && (VT == MVT::f64 || VT == MVT::f32)) { + // The cost is actually exactly the same for mov+fmov vs. adrp+ldr; + // however the mov+fmov sequence is always better because of the reduced + // cache pressure. The timings are still the same if you consider + // movw+movk+fmov vs. adrp+ldr (it's one instruction longer, but the + // movw+movk is fused). So we limit up to 2 instrdduction at most. + SmallVector Insn; + AArch64_IMM::expandMOVImm(ImmInt.getZExtValue(), VT.getSizeInBits(), + Insn); + unsigned Limit = (OptForSize ? 1 : (Subtarget->hasFuseLiterals() ? 5 : 2)); + IsLegal = Insn.size() <= Limit; + } LLVM_DEBUG(dbgs() << (IsLegal ? "Legal " : "Illegal ") << VT.getEVTString() << " imm value: "; Imm.dump();); Index: test/CodeGen/AArch64/arm64-fp-imm-size.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/arm64-fp-imm-size.ll @@ -0,0 +1,40 @@ +; RUN: llc < %s -mtriple=arm64-apple-darwin | FileCheck %s + +; CHECK: literal8 +; CHECK: .quad 4614256656552045848 +define double @foo() optsize { +; CHECK: _foo: +; CHECK: adrp x[[REG:[0-9]+]], lCPI0_0@PAGE +; CHECK: ldr d0, [x[[REG]], lCPI0_0@PAGEOFF] +; CHECK-NEXT: ret + ret double 0x400921FB54442D18 +} + +; CHECK: literal8 +; CHECK: .quad 137438953409 +define double @foo2() optsize { +; CHECK: _foo2: +; CHECK: adrp x[[REG:[0-9]+]], lCPI1_0@PAGE +; CHECK: ldr d0, [x[[REG]], lCPI1_0@PAGEOFF] +; CHECK-NEXT: ret + ret double 0x1FFFFFFFC1 +} + +define float @bar() optsize { +; CHECK: _bar: +; CHECK: adrp x[[REG:[0-9]+]], lCPI2_0@PAGE +; CHECK: ldr s0, [x[[REG]], lCPI2_0@PAGEOFF] +; CHECK-NEXT: ret + ret float 0x400921FB60000000 +} + +; CHECK: literal16 +; CHECK: .quad 0 +; CHECK: .quad 0 +define fp128 @baz() optsize { +; CHECK: _baz: +; CHECK: adrp x[[REG:[0-9]+]], lCPI3_0@PAGE +; CHECK: ldr q0, [x[[REG]], lCPI3_0@PAGEOFF] +; CHECK-NEXT: ret + ret fp128 0xL00000000000000000000000000000000 +} Index: test/CodeGen/AArch64/arm64-fp-imm.ll =================================================================== --- test/CodeGen/AArch64/arm64-fp-imm.ll +++ test/CodeGen/AArch64/arm64-fp-imm.ll @@ -10,12 +10,11 @@ ret double 0x400921FB54442D18 } -; CHECK: literal4 -; CHECK: .long 1078530011 define float @bar() { ; CHECK: _bar: -; CHECK: adrp x[[REG:[0-9]+]], lCPI1_0@PAGE -; CHECK: ldr s0, [x[[REG]], lCPI1_0@PAGEOFF] +; CHECK: mov [[REG:w[0-9]+]], #4059 +; CHECK: movk [[REG]], #16457, lsl #16 +; CHECK: fmov s0, [[REG]] ; CHECK-NEXT: ret ret float 0x400921FB60000000 } Index: test/CodeGen/AArch64/fpimm.ll =================================================================== --- test/CodeGen/AArch64/fpimm.ll +++ test/CodeGen/AArch64/fpimm.ll @@ -45,6 +45,13 @@ ; TINY-DAG: mov [[X128:x[0-9]+]], #4638707616191610880 ; TINY-DAG: fmov {{d[0-9]+}}, [[X128]] +; 64-bit ORR followed by MOVK. +; CHECK-DAG: mov [[XFP0:x[0-9]+]], #1082331758844 +; CHECK-DAG: movk [[XFP0]], #64764, lsl #16 +; CHECk-DAG: fmov {{d[0-9]+}}, [[XFP0]] + %newval3 = fadd double %val, 0xFCFCFC00FC + store volatile double %newval3, double* @varf64 + ; CHECK: ret ; TINY: ret ret void @@ -54,8 +61,9 @@ ; LARGE: mov [[REG:w[0-9]+]], #4059 ; LARGE-NEXT: movk [[REG]], #16457, lsl #16 ; LARGE-NEXT: fmov s0, [[REG]] -; TINY-LABEL: check_float2 -; TINY: ldr s0, .LCPI2_0 +; TINY-LABEL: check_float2 +; TINY: mov [[REG:w[0-9]+]], #4059 +; TINY-NEXT: movk [[REG]], #16457, lsl #16 define float @check_float2() { ret float 3.14159274101257324218750 } Index: test/CodeGen/AArch64/literal_pools_float.ll =================================================================== --- test/CodeGen/AArch64/literal_pools_float.ll +++ test/CodeGen/AArch64/literal_pools_float.ll @@ -31,16 +31,19 @@ %doubleval = load double, double* @vardouble %newdouble = fadd double %doubleval, 129.0 -; CHECK: adrp x[[LITBASE:[0-9]+]], [[CURLIT:.LCPI[0-9]+_[0-9]+]] -; CHECK: ldr [[LIT129:d[0-9]+]], [x[[LITBASE]], {{#?}}:lo12:[[CURLIT]]] ; CHECK-NOFP-NOT: ldr {{d[0-9]+}}, +; CHECK: mov [[W129:x[0-9]+]], #35184372088832 +; CHECK: movk [[W129]], #16480, lsl #48 +; CHECK: fmov {{d[0-9]+}}, [[W129]] ; CHECK-NOFP-NOT: fadd -; CHECK-TINY: ldr [[LIT129:d[0-9]+]], [[CURLIT:.LCPI[0-9]+_[0-9]+]] +; CHECK-TINY: mov [[W129:x[0-9]+]], #35184372088832 +; CHECK-TINY: movk [[W129]], #16480, lsl #48 +; CHECK-TINY: fmov {{d[0-9]+}}, [[W129]] ; CHECK-NOFP-TINY-NOT: ldr {{d[0-9]+}}, ; CHECK-NOFP-TINY-NOT: fadd -; CHECK-LARGE: movz x[[LITADDR:[0-9]+]], #:abs_g0_nc:[[CURLIT:.LCPI[0-9]+_[0-9]+]] +; CHECK-LARGE: movz x[[LITADDR:[0-9]+]], #:abs_g0_nc:[[CURLIT:vardouble]] ; CHECK-LARGE: movk x[[LITADDR]], #:abs_g1_nc:[[CURLIT]] ; CHECK-LARGE: movk x[[LITADDR]], #:abs_g2_nc:[[CURLIT]] ; CHECK-LARGE: movk x[[LITADDR]], #:abs_g3:[[CURLIT]] Index: test/CodeGen/AArch64/misched-fusion-lit.ll =================================================================== --- test/CodeGen/AArch64/misched-fusion-lit.ll +++ test/CodeGen/AArch64/misched-fusion-lit.ll @@ -46,3 +46,18 @@ ; CHECKDONT-NEXT: add {{x[0-9]+}}, {{x[0-9]+}}, {{x[0-9]+}} ; CHECKFUSE-NEXT: movk [[R]], {{#[0-9]+}}, lsl #48 } + +; Function Attrs: norecurse nounwind readnone +define double @litf() { +entry: + ret double 0x400921FB54442D18 + +; CHECK-LABEL: litf: +; CHECK-DONT: adrp [[ADDR:x[0-9]+]], [[CSTLABEL:.LCP.*]] +; CHECK-DONT-NEXT: ldr {{d[0-9]+}}, {{[[]}}[[ADDR]], :lo12:[[CSTLABEL]]{{[]]}} +; CHECK-FUSE: mov [[R:x[0-9]+]], #11544 +; CHECK-FUSE: movk [[R]], #21572, lsl #16 +; CHECK-FUSE: movk [[R]], #8699, lsl #32 +; CHECK-FUSE: movk [[R]], #16393, lsl #48 +; CHECK-FUSE: fmov {{d[0-9]+}}, [[R]] +} Index: test/CodeGen/AArch64/win_cst_pool.ll =================================================================== --- test/CodeGen/AArch64/win_cst_pool.ll +++ test/CodeGen/AArch64/win_cst_pool.ll @@ -2,22 +2,22 @@ ; RUN: llc < %s -mtriple=aarch64-win32-gnu | FileCheck -check-prefix=MINGW %s define double @double() { - ret double 0x0000000000800001 + ret double 0x2000000000800001 } -; CHECK: .globl __real@0000000000800001 -; CHECK-NEXT: .section .rdata,"dr",discard,__real@0000000000800001 +; CHECK: .globl __real@2000000000800001 +; CHECK-NEXT: .section .rdata,"dr",discard,__real@2000000000800001 ; CHECK-NEXT: .p2align 3 -; CHECK-NEXT: __real@0000000000800001: -; CHECK-NEXT: .xword 8388609 +; CHECK-NEXT: __real@2000000000800001: +; CHECK-NEXT: .xword 2305843009222082561 ; CHECK: double: -; CHECK: adrp x8, __real@0000000000800001 -; CHECK-NEXT: ldr d0, [x8, __real@0000000000800001] +; CHECK: adrp x8, __real@2000000000800001 +; CHECK-NEXT: ldr d0, [x8, __real@2000000000800001] ; CHECK-NEXT: ret ; MINGW: .section .rdata,"dr" ; MINGW-NEXT: .p2align 3 ; MINGW-NEXT: [[LABEL:\.LC.*]]: -; MINGW-NEXT: .xword 8388609 +; MINGW-NEXT: .xword 2305843009222082561 ; MINGW: double: ; MINGW: adrp x8, [[LABEL]] ; MINGW-NEXT: ldr d0, [x8, [[LABEL]]]