Index: llvm/trunk/lib/Target/AArch64/AArch64FastISel.cpp =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64FastISel.cpp +++ llvm/trunk/lib/Target/AArch64/AArch64FastISel.cpp @@ -405,10 +405,9 @@ bool Is64Bit = (VT == MVT::f64); // This checks to see if we can use FMOV instructions to materialize // a constant, otherwise we have to materialize via the constant pool. - if (TLI.isFPImmLegal(Val, VT)) { - int Imm = - Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val); - assert((Imm != -1) && "Cannot encode floating-point constant."); + int Imm = + Is64Bit ? AArch64_AM::getFP64Imm(Val) : AArch64_AM::getFP32Imm(Val); + if (Imm != -1) { unsigned Opc = Is64Bit ? AArch64::FMOVDi : AArch64::FMOVSi; return fastEmitInst_i(Opc, TLI.getRegClassFor(VT), Imm); } Index: llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -5424,34 +5424,30 @@ } bool AArch64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { - // We can materialize #0.0 as fmov $Rd, XZR for 64-bit and 32-bit cases. - // FIXME: We should be able to handle f128 as well with a clever lowering. - if (Imm.isPosZero() && (VT == MVT::f64 || VT == MVT::f32 || - (VT == MVT::f16 && Subtarget->hasFullFP16()))) { - LLVM_DEBUG(dbgs() << "Legal " << VT.getEVTString() << " imm value: 0\n"); - return true; - } - bool IsLegal = false; - SmallString<128> ImmStrVal; - Imm.toString(ImmStrVal); - + // We can materialize #0.0 as fmov $Rd, XZR for 64-bit, 32-bit cases, and + // 16-bit case when target has full fp16 support. + // FIXME: We should be able to handle f128 as well with a clever lowering. + const APInt ImmInt = Imm.bitcastToAPInt(); if (VT == MVT::f64) - IsLegal = AArch64_AM::getFP64Imm(Imm) != -1; + IsLegal = AArch64_AM::getFP64Imm(ImmInt) != -1 || Imm.isPosZero(); else if (VT == MVT::f32) - IsLegal = AArch64_AM::getFP32Imm(Imm) != -1; + IsLegal = AArch64_AM::getFP32Imm(ImmInt) != -1 || Imm.isPosZero(); else if (VT == MVT::f16 && Subtarget->hasFullFP16()) - IsLegal = AArch64_AM::getFP16Imm(Imm) != -1; + IsLegal = AArch64_AM::getFP16Imm(ImmInt) != -1 || Imm.isPosZero(); + // TODO: fmov h0, w0 is also legal, however on't have an isel pattern to + // generate that fmov. + + // If we can not materialize in immediate field for fmov, check if the + // value can be encoded as the immediate operand of a logical instruction. + // The immediate value will be created with either MOVZ, MOVN, or ORR. + if (!IsLegal && (VT == MVT::f64 || VT == MVT::f32)) + IsLegal = AArch64_AM::isAnyMOVWMovAlias(ImmInt.getZExtValue(), + VT.getSizeInBits()); - if (IsLegal) { - LLVM_DEBUG(dbgs() << "Legal " << VT.getEVTString() - << " imm value: " << ImmStrVal << "\n"); - return true; - } - - LLVM_DEBUG(dbgs() << "Illegal " << VT.getEVTString() - << " imm value: " << ImmStrVal << "\n"); - return false; + LLVM_DEBUG(dbgs() << (IsLegal ? "Legal " : "Illegal ") << VT.getEVTString() + << " imm value: "; Imm.dump();); + return IsLegal; } //===----------------------------------------------------------------------===// Index: llvm/trunk/test/CodeGen/AArch64/fabs.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/fabs.ll +++ llvm/trunk/test/CodeGen/AArch64/fabs.ll @@ -22,11 +22,11 @@ define float @still_not_fabs(float %x) #0 { ; CHECK-LABEL: still_not_fabs: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI1_0 -; CHECK-NEXT: ldr s1, [x8, :lo12:.LCPI1_0] -; CHECK-NEXT: fneg s2, s0 -; CHECK-NEXT: fcmp s0, s1 -; CHECK-NEXT: fcsel s0, s0, s2, ge +; CHECK-NEXT: orr w8, wzr, #0x80000000 +; CHECK-NEXT: fmov s2, w8 +; CHECK-NEXT: fneg s1, s0 +; CHECK-NEXT: fcmp s0, s2 +; CHECK-NEXT: fcsel s0, s0, s1, ge ; CHECK-NEXT: ret %cmp = fcmp nnan oge float %x, -0.0 %sub = fsub nnan float -0.0, %x Index: llvm/trunk/test/CodeGen/AArch64/fadd-combines.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/fadd-combines.ll +++ llvm/trunk/test/CodeGen/AArch64/fadd-combines.ll @@ -132,13 +132,13 @@ define float @fadd_const_multiuse_fmf(float %x) { ; CHECK-LABEL: fadd_const_multiuse_fmf: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI10_0 -; CHECK-NEXT: adrp x9, .LCPI10_1 -; CHECK-NEXT: ldr s1, [x8, :lo12:.LCPI10_0] -; CHECK-NEXT: ldr s2, [x9, :lo12:.LCPI10_1] -; CHECK-NEXT: fadd s1, s0, s1 -; CHECK-NEXT: fadd s0, s0, s2 -; CHECK-NEXT: fadd s0, s1, s0 +; CHECK-DAG: mov [[W59:w[0-9]+]], #1114374144 +; CHECK-DAG: mov [[W42:w[0-9]+]], #1109917696 +; CHECK-DAG: fmov [[FP59:s[0-9]+]], [[W59]] +; CHECK-DAG: fmov [[FP42:s[0-9]+]], [[W42]] +; CHECK-NEXT: fadd [[TMP1:s[0-9]+]], s0, [[FP42]] +; CHECK-NEXT: fadd [[TMP2:s[0-9]+]], s0, [[FP59]] +; CHECK-NEXT: fadd s0, [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret %a1 = fadd float %x, 42.0 %a2 = fadd nsz reassoc float %a1, 17.0 @@ -153,13 +153,13 @@ define float @fadd_const_multiuse_attr(float %x) #0 { ; CHECK-LABEL: fadd_const_multiuse_attr: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x9, .LCPI11_1 -; CHECK-NEXT: adrp x8, .LCPI11_0 -; CHECK-NEXT: ldr s1, [x9, :lo12:.LCPI11_1] -; CHECK-NEXT: ldr s2, [x8, :lo12:.LCPI11_0] -; CHECK-NEXT: fadd s1, s0, s1 -; CHECK-NEXT: fadd s1, s2, s1 -; CHECK-NEXT: fadd s0, s0, s1 +; CHECK-DAG: mov [[W59:w[0-9]+]], #1114374144 +; CHECK-DAG: mov [[W17:w[0-9]+]], #1109917696 +; CHECK-NEXT: fmov [[FP59:s[0-9]+]], [[W59]] +; CHECK-NEXT: fmov [[FP17:s[0-9]+]], [[W17]] +; CHECK-NEXT: fadd [[TMP1:s[0-9]+]], s0, [[FP59]] +; CHECK-NEXT: fadd [[TMP2:s[0-9]+]], [[FP17]], [[TMP1]] +; CHECK-NEXT: fadd s0, s0, [[TMP2]] ; CHECK-NEXT: ret %a1 = fadd float %x, 42.0 %a2 = fadd float %a1, 17.0 Index: llvm/trunk/test/CodeGen/AArch64/fpimm.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/fpimm.ll +++ llvm/trunk/test/CodeGen/AArch64/fpimm.ll @@ -18,8 +18,10 @@ %newval2 = fadd float %val, 128.0 store volatile float %newval2, float* @varf32 -; CHECK-DAG: ldr {{s[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:.LCPI0_0 -; TINY-DAG: ldr {{s[0-9]+}}, .LCPI0_0 +; CHECK-DAG: mov [[W128:w[0-9]+]], #1124073472 +; CHECK-DAG: fmov {{s[0-9]+}}, [[W128]] +; TINY-DAG: mov [[W128:w[0-9]+]], #1124073472 +; TINY-DAG: fmov {{s[0-9]+}}, [[W128]] ; CHECK: ret ; TINY: ret @@ -38,8 +40,10 @@ %newval2 = fadd double %val, 128.0 store volatile double %newval2, double* @varf64 -; CHECK-DAG: ldr {{d[0-9]+}}, [{{x[0-9]+}}, {{#?}}:lo12:.LCPI1_0 -; TINY-DAG: ldr {{d[0-9]+}}, .LCPI1_0 +; CHECK-DAG: mov [[X128:x[0-9]+]], #4638707616191610880 +; CHECK-DAG: fmov {{d[0-9]+}}, [[X128]] +; TINY-DAG: mov [[X128:x[0-9]+]], #4638707616191610880 +; TINY-DAG: fmov {{d[0-9]+}}, [[X128]] ; CHECK: ret ; TINY: ret Index: llvm/trunk/test/CodeGen/AArch64/isinf.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/isinf.ll +++ llvm/trunk/test/CodeGen/AArch64/isinf.ll @@ -0,0 +1,62 @@ +; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+neon,+fullfp16 < %s -o -| FileCheck %s + +declare half @llvm.fabs.f16(half) +declare float @llvm.fabs.f32(float) +declare double @llvm.fabs.f64(double) +declare fp128 @llvm.fabs.f128(fp128) + +; INFINITY requires loading the constant for _Float16 +define i32 @replace_isinf_call_f16(half %x) { +; CHECK-LABEL: replace_isinf_call_f16: +; CHECK: adrp [[ADDR:x[0-9]+]], [[CSTLABEL:.LCP.*]] +; CHECK: ldr [[INFINITY:h[0-9]+]], {{[[]}}[[ADDR]], :lo12:[[CSTLABEL]]{{[]]}} +; CHECK-NEXT: fabs [[ABS:h[0-9]+]], h0 +; CHECK-NEXT: fcmp [[ABS]], [[INFINITY]] +; CHECK-NEXT: cset w0, eq + %abs = tail call half @llvm.fabs.f16(half %x) + %cmpinf = fcmp oeq half %abs, 0xH7C00 + %ret = zext i1 %cmpinf to i32 + ret i32 %ret +} + +; Check if INFINITY for float is materialized +define i32 @replace_isinf_call_f32(float %x) { +; CHECK-LABEL: replace_isinf_call_f32: +; CHECK: orr [[INFSCALARREG:w[0-9]+]], wzr, #0x7f800000 +; CHECK-NEXT: fabs [[ABS:s[0-9]+]], s0 +; CHECK-NEXT: fmov [[INFREG:s[0-9]+]], [[INFSCALARREG]] +; CHECK-NEXT: fcmp [[ABS]], [[INFREG]] +; CHECK-NEXT: cset w0, eq + %abs = tail call float @llvm.fabs.f32(float %x) + %cmpinf = fcmp oeq float %abs, 0x7FF0000000000000 + %ret = zext i1 %cmpinf to i32 + ret i32 %ret +} + +; Check if INFINITY for double is materialized +define i32 @replace_isinf_call_f64(double %x) { +; CHECK-LABEL: replace_isinf_call_f64: +; CHECK: orr [[INFSCALARREG:x[0-9]+]], xzr, #0x7ff0000000000000 +; CHECK-NEXT: fabs [[ABS:d[0-9]+]], d0 +; CHECK-NEXT: fmov [[INFREG:d[0-9]+]], [[INFSCALARREG]] +; CHECK-NEXT: fcmp [[ABS]], [[INFREG]] +; CHECK-NEXT: cset w0, eq + %abs = tail call double @llvm.fabs.f64(double %x) + %cmpinf = fcmp oeq double %abs, 0x7FF0000000000000 + %ret = zext i1 %cmpinf to i32 + ret i32 %ret +} + +; For long double it still requires loading the constant. +define i32 @replace_isinf_call_f128(fp128 %x) { +; CHECK-LABEL: replace_isinf_call_f128: +; CHECK: adrp [[ADDR:x[0-9]+]], [[CSTLABEL:.LCP.*]] +; CHECK: ldr q1, {{[[]}}[[ADDR]], :lo12:[[CSTLABEL]]{{[]]}} +; CHECK: bl __eqtf2 +; CHECK: cmp w0, #0 +; CHECK: cset w0, eq + %abs = tail call fp128 @llvm.fabs.f128(fp128 %x) + %cmpinf = fcmp oeq fp128 %abs, 0xL00000000000000007FFF000000000000 + %ret = zext i1 %cmpinf to i32 + ret i32 %ret +} Index: llvm/trunk/test/CodeGen/AArch64/known-never-nan.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/known-never-nan.ll +++ llvm/trunk/test/CodeGen/AArch64/known-never-nan.ll @@ -28,13 +28,13 @@ define float @not_fmaxnm_maybe_nan(i32 %i1, i32 %i2) #0 { ; CHECK-LABEL: not_fmaxnm_maybe_nan: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI1_0 -; CHECK-NEXT: ldr s0, [x8, :lo12:.LCPI1_0] -; CHECK-NEXT: ucvtf s1, w0 -; CHECK-NEXT: ucvtf s2, w1 -; CHECK-NEXT: fmov s3, #17.00000000 -; CHECK-NEXT: fmul s0, s1, s0 -; CHECK-NEXT: fadd s1, s2, s3 +; CHECK-NEXT: orr w8, wzr, #0xff800000 +; CHECK-NEXT: ucvtf s0, w0 +; CHECK-NEXT: ucvtf s1, w1 +; CHECK-NEXT: fmov s2, #17.00000000 +; CHECK-NEXT: fmov s3, w8 +; CHECK-NEXT: fmul s0, s0, s3 +; CHECK-NEXT: fadd s1, s1, s2 ; CHECK-NEXT: fcmp s0, s1 ; CHECK-NEXT: fcsel s0, s0, s1, pl ; CHECK-NEXT: ret Index: llvm/trunk/test/CodeGen/AArch64/literal_pools_float.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/literal_pools_float.ll +++ llvm/trunk/test/CodeGen/AArch64/literal_pools_float.ll @@ -13,18 +13,16 @@ %floatval = load float, float* @varfloat %newfloat = fadd float %floatval, 128.0 -; CHECK: adrp x[[LITBASE:[0-9]+]], [[CURLIT:.LCPI[0-9]+_[0-9]+]] -; CHECK: ldr [[LIT128:s[0-9]+]], [x[[LITBASE]], {{#?}}:lo12:[[CURLIT]]] +; CHECK: mov [[W128:w[0-9]+]], #1124073472 +; CHECK: fmov [[LIT128:s[0-9]+]], [[W128]] ; CHECK-NOFP-NOT: ldr {{s[0-9]+}}, -; CHECK-TINY: ldr [[LIT128:s[0-9]+]], [[CURLIT:.LCPI[0-9]+_[0-9]+]] +; CHECK-TINY: mov [[W128:w[0-9]+]], #1124073472 +; CHECK-TINE: fmov [[LIT128:s[0-9]+]], [[W128]] ; CHECK-NOFP-TINY-NOT: ldr {{s[0-9]+}}, -; CHECK-LARGE: movz x[[LITADDR:[0-9]+]], #:abs_g0_nc:[[CURLIT:.LCPI[0-9]+_[0-9]+]] -; CHECK-LARGE: movk x[[LITADDR]], #:abs_g1_nc:[[CURLIT]] -; CHECK-LARGE: movk x[[LITADDR]], #:abs_g2_nc:[[CURLIT]] -; CHECK-LARGE: movk x[[LITADDR]], #:abs_g3:[[CURLIT]] -; CHECK-LARGE: ldr {{s[0-9]+}}, [x[[LITADDR]]] +; CHECK-LARGE: mov [[W128:w[0-9]+]], #1124073472 +; CHECK-LARGE: fmov [[LIT128:s[0-9]+]], [[W128]] ; CHECK-LARGE: fadd ; CHECK-NOFP-LARGE-NOT: ldr {{s[0-9]+}}, ; CHECK-NOFP-LARGE-NOT: fadd Index: llvm/trunk/test/CodeGen/AArch64/win_cst_pool.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/win_cst_pool.ll +++ llvm/trunk/test/CodeGen/AArch64/win_cst_pool.ll @@ -2,22 +2,22 @@ ; RUN: llc < %s -mtriple=aarch64-win32-gnu | FileCheck -check-prefix=MINGW %s define double @double() { - ret double 0x0000000000800000 + ret double 0x0000000000800001 } -; CHECK: .globl __real@0000000000800000 -; CHECK-NEXT: .section .rdata,"dr",discard,__real@0000000000800000 +; CHECK: .globl __real@0000000000800001 +; CHECK-NEXT: .section .rdata,"dr",discard,__real@0000000000800001 ; CHECK-NEXT: .p2align 3 -; CHECK-NEXT: __real@0000000000800000: -; CHECK-NEXT: .xword 8388608 +; CHECK-NEXT: __real@0000000000800001: +; CHECK-NEXT: .xword 8388609 ; CHECK: double: -; CHECK: adrp x8, __real@0000000000800000 -; CHECK-NEXT: ldr d0, [x8, __real@0000000000800000] +; CHECK: adrp x8, __real@0000000000800001 +; CHECK-NEXT: ldr d0, [x8, __real@0000000000800001] ; CHECK-NEXT: ret ; MINGW: .section .rdata,"dr" ; MINGW-NEXT: .p2align 3 ; MINGW-NEXT: [[LABEL:\.LC.*]]: -; MINGW-NEXT: .xword 8388608 +; MINGW-NEXT: .xword 8388609 ; MINGW: double: ; MINGW: adrp x8, [[LABEL]] ; MINGW-NEXT: ldr d0, [x8, [[LABEL]]]