diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp --- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -1254,7 +1254,9 @@ switch (MI.getOpcode()) { default: llvm_unreachable("Unexpected opcode"); case AArch64::FMOVH0: - FMov.setOpcode(AArch64::FMOVWHr); + FMov.setOpcode(STI->hasFullFP16() ? AArch64::FMOVWHr : AArch64::FMOVWSr); + if (!STI->hasFullFP16()) + DestReg = (AArch64::S0 + (DestReg - AArch64::H0)); FMov.addOperand(MCOperand::createReg(DestReg)); FMov.addOperand(MCOperand::createReg(AArch64::WZR)); break; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -9772,14 +9772,16 @@ IsLegal = AArch64_AM::getFP64Imm(ImmInt) != -1 || Imm.isPosZero(); else if (VT == MVT::f32) IsLegal = AArch64_AM::getFP32Imm(ImmInt) != -1 || Imm.isPosZero(); - else if (VT == MVT::f16 && Subtarget->hasFullFP16()) - IsLegal = AArch64_AM::getFP16Imm(ImmInt) != -1 || Imm.isPosZero(); - // TODO: fmov h0, w0 is also legal, however on't have an isel pattern to - // generate that fmov. + else if (VT == MVT::f16) + IsLegal = + (Subtarget->hasFullFP16() && AArch64_AM::getFP16Imm(ImmInt) != -1) || + Imm.isPosZero(); // If we can not materialize in immediate field for fmov, check if the // value can be encoded as the immediate operand of a logical instruction. // The immediate value will be created with either MOVZ, MOVN, or ORR. + // TODO: fmov h0, w0 is also legal, however we don't have an isel pattern to + // generate that fmov. if (!IsLegal && (VT == MVT::f64 || VT == MVT::f32)) { // The cost is actually exactly the same for mov+fmov vs. adrp+ldr; // however the mov+fmov sequence is always better because of the reduced diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -4204,7 +4204,7 @@ // Add pseudo ops for FMOV 0 so we can mark them as isReMaterializable let isReMaterializable = 1, isCodeGenOnly = 1, isAsCheapAsAMove = 1 in { def FMOVH0 : Pseudo<(outs FPR16:$Rd), (ins), [(set f16:$Rd, (fpimm0))]>, - Sched<[WriteF]>, Requires<[HasFullFP16]>; + Sched<[WriteF]>; def FMOVS0 : Pseudo<(outs FPR32:$Rd), (ins), [(set f32:$Rd, (fpimm0))]>, Sched<[WriteF]>; def FMOVD0 : Pseudo<(outs FPR64:$Rd), (ins), [(set f64:$Rd, (fpimm0))]>, diff --git a/llvm/test/CodeGen/AArch64/arm64-fmax.ll b/llvm/test/CodeGen/AArch64/arm64-fmax.ll --- a/llvm/test/CodeGen/AArch64/arm64-fmax.ll +++ b/llvm/test/CodeGen/AArch64/arm64-fmax.ll @@ -73,8 +73,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $h0 killed $h0 def $s0 ; CHECK-NEXT: fcvt s1, h0 -; CHECK-NEXT: adrp x8, .LCPI5_0 -; CHECK-NEXT: ldr h2, [x8, :lo12:.LCPI5_0] +; CHECK-NEXT: movi d2, #0000000000000000 ; CHECK-NEXT: fcmp s1, #0.0 ; CHECK-NEXT: fcsel s0, s0, s2, lt ; CHECK-NEXT: fcvt s0, h0 diff --git a/llvm/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll b/llvm/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll --- a/llvm/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll +++ b/llvm/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll @@ -19,7 +19,7 @@ entry: ; ALL-LABEL: t1: ; ALL-NOT: fmov -; NONEFP-DAG: ldr h0,{{.*}} +; NONEFP-DAG: fmov s0, wzr ; NONEFP-DAG: fmov s1, wzr ; NONEFP-DAG: fmov d2, xzr ; NONEFP-DAG: movi{{(.16b)?}} v3{{(.2d)?}}, #0 @@ -27,7 +27,7 @@ ; NONE16: fmov s1, wzr ; NONE16: fmov d2, xzr ; NONE16: movi{{(.16b)?}} v3{{(.2d)?}}, #0 -; ZEROFP-DAG: ldr h0,{{.*}} +; ZEROFP-DAG: movi d0, #0 ; ZEROFP-DAG: movi d1, #0 ; ZEROFP-DAG: movi d2, #0 ; ZEROFP-DAG: movi v3.2d, #0 diff --git a/llvm/test/CodeGen/AArch64/f16-imm.ll b/llvm/test/CodeGen/AArch64/f16-imm.ll --- a/llvm/test/CodeGen/AArch64/f16-imm.ll +++ b/llvm/test/CodeGen/AArch64/f16-imm.ll @@ -16,8 +16,7 @@ ; ; CHECK-NOFP16-LABEL: Const0: ; CHECK-NOFP16: // %bb.0: // %entry -; CHECK-NOFP16-NEXT: adrp x8, .LCPI0_0 -; CHECK-NOFP16-NEXT: ldr h0, [x8, :lo12:.LCPI0_0] +; CHECK-NOFP16-NEXT: movi d0, #0000000000000000 ; CHECK-NOFP16-NEXT: ret entry: ret half 0xH0000 diff --git a/llvm/test/CodeGen/AArch64/mattr-all.ll b/llvm/test/CodeGen/AArch64/mattr-all.ll --- a/llvm/test/CodeGen/AArch64/mattr-all.ll +++ b/llvm/test/CodeGen/AArch64/mattr-all.ll @@ -6,8 +6,7 @@ define half @bf16() nounwind { ; CHECK-LABEL: bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI0_0 -; CHECK-NEXT: ldr h0, [x8, :lo12:.LCPI0_0] +; CHECK-NEXT: movi d0, #0000000000000000 ; CHECK-NEXT: ret ret half 0xH0000 } diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll b/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll --- a/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll @@ -294,9 +294,8 @@ ; ; CHECKNOFP16-LABEL: fadd_reduction_v4f16_in_loop: ; CHECKNOFP16: // %bb.0: // %entry -; CHECKNOFP16-NEXT: adrp x9, .LCPI10_0 +; CHECKNOFP16-NEXT: movi d0, #0000000000000000 ; CHECKNOFP16-NEXT: mov x8, xzr -; CHECKNOFP16-NEXT: ldr h0, [x9, :lo12:.LCPI10_0] ; CHECKNOFP16-NEXT: .LBB10_1: // %loop ; CHECKNOFP16-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECKNOFP16-NEXT: ldr d1, [x0, x8] @@ -365,9 +364,8 @@ ; ; CHECKNOFP16-LABEL: fadd_reduction_v8f16_in_loop: ; CHECKNOFP16: // %bb.0: // %entry -; CHECKNOFP16-NEXT: adrp x9, .LCPI11_0 +; CHECKNOFP16-NEXT: movi d0, #0000000000000000 ; CHECKNOFP16-NEXT: mov x8, xzr -; CHECKNOFP16-NEXT: ldr h0, [x9, :lo12:.LCPI11_0] ; CHECKNOFP16-NEXT: .LBB11_1: // %loop ; CHECKNOFP16-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECKNOFP16-NEXT: ldr q1, [x0, x8]