Index: llvm/lib/Target/AArch64/AArch64InstrFormats.td =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -640,6 +640,21 @@ def fixedpoint_f32_i64 : fixedpoint_i64; def fixedpoint_f64_i64 : fixedpoint_i64; +def fixedpoint_i64_literal : Operand, ImmLeaf 0) && (((uint32_t)Imm) < 65); +}]> { + let EncoderMethod = "getFixedPointScaleOpValue"; + let DecoderMethod = "DecodeFixedPointScaleImm64"; + let ParserMatchClass = Imm1_64Operand; +} +def fixedpoint_i32_literal : Operand, ImmLeaf 0) && (((uint32_t)Imm) < 33); +}]> { + let EncoderMethod = "getFixedPointScaleOpValue"; + let DecoderMethod = "DecodeFixedPointScaleImm32"; + let ParserMatchClass = Imm1_32Operand; +} + def vecshiftR8 : Operand, ImmLeaf 0) && (((uint32_t)Imm) < 9); }]> { @@ -4652,9 +4667,8 @@ SDPatternOperator OpN> { // Scaled half-precision to 32-bit def SWHri : BaseFPToInteger<0b11, rmode, opcode, FPR16, GPR32, - fixedpoint_f16_i32, asm, - [(set GPR32:$Rd, (OpN (fmul (f16 FPR16:$Rn), - fixedpoint_f16_i32:$scale)))]> { + fixedpoint_i32_literal, asm, + [(set GPR32:$Rd, (OpN (f16 FPR16:$Rn), fixedpoint_i32_literal:$scale))]> { let Inst{31} = 0; // 32-bit GPR flag let scale{5} = 1; let Predicates = [HasFullFP16]; @@ -4662,44 +4676,39 @@ // Scaled half-precision to 64-bit def SXHri : BaseFPToInteger<0b11, rmode, opcode, FPR16, GPR64, - fixedpoint_f16_i64, asm, - [(set GPR64:$Rd, (OpN (fmul (f16 FPR16:$Rn), - fixedpoint_f16_i64:$scale)))]> { + fixedpoint_i64_literal, asm, + [(set GPR64:$Rd, (OpN (f16 FPR16:$Rn), fixedpoint_i64_literal:$scale))]> { let Inst{31} = 1; // 64-bit GPR flag let Predicates = [HasFullFP16]; } // Scaled single-precision to 32-bit def SWSri : BaseFPToInteger<0b00, rmode, opcode, FPR32, GPR32, - fixedpoint_f32_i32, asm, - [(set GPR32:$Rd, (OpN (fmul FPR32:$Rn, - fixedpoint_f32_i32:$scale)))]> { + fixedpoint_i32_literal, asm, + [(set GPR32:$Rd, (OpN FPR32:$Rn, fixedpoint_i32_literal:$scale))]> { let Inst{31} = 0; // 32-bit GPR flag let scale{5} = 1; } // Scaled single-precision to 64-bit def SXSri : BaseFPToInteger<0b00, rmode, opcode, FPR32, GPR64, - fixedpoint_f32_i64, asm, - [(set GPR64:$Rd, (OpN (fmul FPR32:$Rn, - fixedpoint_f32_i64:$scale)))]> { + fixedpoint_i64_literal, asm, + [(set GPR64:$Rd, (OpN FPR32:$Rn, fixedpoint_i64_literal:$scale))]> { let Inst{31} = 1; // 64-bit GPR flag } // Scaled double-precision to 32-bit def SWDri : BaseFPToInteger<0b01, rmode, opcode, FPR64, GPR32, - fixedpoint_f64_i32, asm, - [(set GPR32:$Rd, (OpN (fmul FPR64:$Rn, - fixedpoint_f64_i32:$scale)))]> { + fixedpoint_i32_literal, asm, + [(set GPR32:$Rd, (OpN (f64 FPR64:$Rn), fixedpoint_i32_literal:$scale))]> { let Inst{31} = 0; // 32-bit GPR flag let scale{5} = 1; } // Scaled double-precision to 64-bit def SXDri : BaseFPToInteger<0b01, rmode, opcode, FPR64, GPR64, - fixedpoint_f64_i64, asm, - [(set GPR64:$Rd, (OpN (fmul FPR64:$Rn, - fixedpoint_f64_i64:$scale)))]> { + fixedpoint_i64_literal, asm, + [(set GPR64:$Rd, (OpN (f64 FPR64:$Rn), fixedpoint_i64_literal:$scale))]> { let Inst{31} = 1; // 64-bit GPR flag } } @@ -4709,11 +4718,11 @@ //--- let mayStore = 0, mayLoad = 0, hasSideEffects = 0, mayRaiseFPException = 1 in -class BaseIntegerToFP pattern> + ValueType dvt, Operand immType, string asm, SDPatternOperator node> : I<(outs dstType:$Rd), (ins srcType:$Rn, immType:$scale), - asm, "\t$Rd, $Rn, $scale", "", pattern>, + asm, "\t$Rd, $Rn, $scale", "", [(set (dvt dstType:$Rd), (node srcType:$Rn, immType:$scale))]>, Sched<[WriteFCvt]> { bits<5> Rd; bits<5> Rn; @@ -4744,8 +4753,7 @@ let Inst{4-0} = Rd; } -multiclass IntegerToFP { - // Unscaled +multiclass IntegerToFPUnscaled { def UWHri: BaseIntegerToFPUnscaled { let Inst{31} = 0; // 32-bit GPR flag let Inst{23-22} = 0b11; // 16-bit FPR flag @@ -4777,57 +4785,40 @@ let Inst{31} = 1; // 64-bit GPR flag let Inst{23-22} = 0b01; // 64-bit FPR flag } +} - // Scaled - def SWHri: BaseIntegerToFP { +multiclass IntegerToFPScaled { + def SWHri: BaseIntegerToFPScaled { let Inst{31} = 0; // 32-bit GPR flag let Inst{23-22} = 0b11; // 16-bit FPR flag let scale{5} = 1; let Predicates = [HasFullFP16]; } - def SWSri: BaseIntegerToFP { + def SWSri: BaseIntegerToFPScaled { let Inst{31} = 0; // 32-bit GPR flag let Inst{23-22} = 0b00; // 32-bit FPR flag let scale{5} = 1; } - def SWDri: BaseIntegerToFP { + def SWDri: BaseIntegerToFPScaled { let Inst{31} = 0; // 32-bit GPR flag let Inst{23-22} = 0b01; // 64-bit FPR flag let scale{5} = 1; } - def SXHri: BaseIntegerToFP { + def SXHri: BaseIntegerToFPScaled { let Inst{31} = 1; // 64-bit GPR flag let Inst{23-22} = 0b11; // 16-bit FPR flag let Predicates = [HasFullFP16]; } - def SXSri: BaseIntegerToFP { + def SXSri: BaseIntegerToFPScaled { let Inst{31} = 1; // 64-bit GPR flag let Inst{23-22} = 0b00; // 32-bit FPR flag } - def SXDri: BaseIntegerToFP { + def SXDri: BaseIntegerToFPScaled { let Inst{31} = 1; // 64-bit GPR flag let Inst{23-22} = 0b01; // 64-bit FPR flag } Index: llvm/lib/Target/AArch64/AArch64InstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -3864,8 +3864,8 @@ defm FCVTPU : FPToIntegerUnscaled<0b01, 0b001, "fcvtpu", int_aarch64_neon_fcvtpu>; defm FCVTZS : FPToIntegerUnscaled<0b11, 0b000, "fcvtzs", any_fp_to_sint>; defm FCVTZU : FPToIntegerUnscaled<0b11, 0b001, "fcvtzu", any_fp_to_uint>; -defm FCVTZS : FPToIntegerScaled<0b11, 0b000, "fcvtzs", any_fp_to_sint>; -defm FCVTZU : FPToIntegerScaled<0b11, 0b001, "fcvtzu", any_fp_to_uint>; +defm FCVTZS : FPToIntegerScaled<0b11, 0b000, "fcvtzs", int_aarch64_neon_vcvtfp2fxs>; +defm FCVTZU : FPToIntegerScaled<0b11, 0b001, "fcvtzu", int_aarch64_neon_vcvtfp2fxu>; // AArch64's FCVT instructions saturate when out of range. multiclass FPToIntegerSatPats { @@ -3903,6 +3903,26 @@ defm : FPToIntegerSatPats; defm : FPToIntegerSatPats; +multiclass FPToIntegerScaledPats { + let Predicates = [HasFullFP16] in { + def : Pat<(i32 (to_int (fmul f16:$Rn, fixedpoint_f16_i32:$scale))), + (!cast(INST # SWHri) $Rn, $scale)>; + def : Pat<(i64 (to_int (fmul f16:$Rn, fixedpoint_f16_i64:$scale))), + (!cast(INST # SXHri) $Rn, $scale)>; + } + def : Pat<(i32 (to_int (fmul f32:$Rn, fixedpoint_f32_i32:$scale))), + (!cast(INST # SWSri) $Rn, $scale)>; + def : Pat<(i64 (to_int (fmul f32:$Rn, fixedpoint_f32_i64:$scale))), + (!cast(INST # SXSri) $Rn, $scale)>; + def : Pat<(i32 (to_int (fmul f64:$Rn, fixedpoint_f64_i32:$scale))), + (!cast(INST # SWDri) $Rn, $scale)>; + def : Pat<(i64 (to_int (fmul f64:$Rn, fixedpoint_f64_i64:$scale))), + (!cast(INST # SXDri) $Rn, $scale)>; +} + +defm : FPToIntegerScaledPats; +defm : FPToIntegerScaledPats; + multiclass FPToIntegerIntPats { let Predicates = [HasFullFP16] in { def : Pat<(i32 (round f16:$Rn)), (!cast(INST # UWHr) $Rn)>; @@ -3913,20 +3933,7 @@ def : Pat<(i32 (round f64:$Rn)), (!cast(INST # UWDr) $Rn)>; def : Pat<(i64 (round f64:$Rn)), (!cast(INST # UXDr) $Rn)>; - let Predicates = [HasFullFP16] in { - def : Pat<(i32 (round (fmul f16:$Rn, fixedpoint_f16_i32:$scale))), - (!cast(INST # SWHri) $Rn, $scale)>; - def : Pat<(i64 (round (fmul f16:$Rn, fixedpoint_f16_i64:$scale))), - (!cast(INST # SXHri) $Rn, $scale)>; - } - def : Pat<(i32 (round (fmul f32:$Rn, fixedpoint_f32_i32:$scale))), - (!cast(INST # SWSri) $Rn, $scale)>; - def : Pat<(i64 (round (fmul f32:$Rn, fixedpoint_f32_i64:$scale))), - (!cast(INST # SXSri) $Rn, $scale)>; - def : Pat<(i32 (round (fmul f64:$Rn, fixedpoint_f64_i32:$scale))), - (!cast(INST # SWDri) $Rn, $scale)>; - def : Pat<(i64 (round (fmul f64:$Rn, fixedpoint_f64_i64:$scale))), - (!cast(INST # SXDri) $Rn, $scale)>; + defm : FPToIntegerScaledPats; } defm : FPToIntegerIntPats; @@ -3995,13 +4002,36 @@ // Scaled integer to floating point conversion instructions. //===----------------------------------------------------------------------===// -defm SCVTF : IntegerToFP<0, "scvtf", any_sint_to_fp>; -defm UCVTF : IntegerToFP<1, "ucvtf", any_uint_to_fp>; +defm SCVTF : IntegerToFPScaled<0, "scvtf", int_aarch64_neon_vcvtfxs2fp>; +defm UCVTF : IntegerToFPScaled<1, "ucvtf", int_aarch64_neon_vcvtfxu2fp>; + +multiclass IntegerToFPScaledPats { + let Predicates = [HasFullFP16] in { + def : Pat<(f16 (fdiv (to_fp i32:$Rn), fixedpoint_f16_i32:$scale)), + (!cast(INST # SWHri) $Rn, $scale)>; + def : Pat<(f16 (fdiv (to_fp i64:$Rn), fixedpoint_f16_i64:$scale)), + (!cast(INST # SXHri) $Rn, $scale)>; + } + def : Pat<(f32 (fdiv (to_fp i32:$Rn), fixedpoint_f32_i32:$scale)), + (!cast(INST # SWSri) $Rn, $scale)>; + def : Pat<(f64 (fdiv (to_fp i32:$Rn), fixedpoint_f64_i32:$scale)), + (!cast(INST # SWDri) $Rn, $scale)>; + def : Pat<(f32 (fdiv (to_fp i64:$Rn), fixedpoint_f32_i64:$scale)), + (!cast(INST # SXSri) $Rn, $scale)>; + def : Pat<(f64 (fdiv (to_fp i64:$Rn), fixedpoint_f64_i64:$scale)), + (!cast(INST # SXDri) $Rn, $scale)>; +} + +defm : IntegerToFPScaledPats; +defm : IntegerToFPScaledPats; //===----------------------------------------------------------------------===// // Unscaled integer to floating point conversion instruction. //===----------------------------------------------------------------------===// +defm SCVTF : IntegerToFPUnscaled<0, "scvtf", any_sint_to_fp>; +defm UCVTF : IntegerToFPUnscaled<1, "ucvtf", any_uint_to_fp>; + defm FMOV : UnscaledConversion<"fmov">; // Add pseudo ops for FMOV 0 so we can mark them as isReMaterializable Index: llvm/test/CodeGen/AArch64/fcvt-fixed.ll =================================================================== --- llvm/test/CodeGen/AArch64/fcvt-fixed.ll +++ llvm/test/CodeGen/AArch64/fcvt-fixed.ll @@ -2,6 +2,179 @@ ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,CHECK-NO16 ; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-FP16 + +declare float @llvm.aarch64.neon.vcvtfxs2fp.f32.i32(i32, i32) +declare float @llvm.aarch64.neon.vcvtfxs2fp.f32.i64(i64, i32) +declare double @llvm.aarch64.neon.vcvtfxs2fp.f64.i32(i32, i32) +declare double @llvm.aarch64.neon.vcvtfxs2fp.f64.i64(i64, i32) + +declare float @llvm.aarch64.neon.vcvtfxu2fp.f32.i32(i32, i32) +declare float @llvm.aarch64.neon.vcvtfxu2fp.f32.i64(i64, i32) +declare double @llvm.aarch64.neon.vcvtfxu2fp.f64.i32(i32, i32) +declare double @llvm.aarch64.neon.vcvtfxu2fp.f64.i64(i64, i32) + +declare i32 @llvm.aarch64.neon.vcvtfp2fxs.i32.f32(float, i32) +declare i64 @llvm.aarch64.neon.vcvtfp2fxs.i64.f32(float, i32) +declare i32 @llvm.aarch64.neon.vcvtfp2fxs.i32.f64(double, i32) +declare i64 @llvm.aarch64.neon.vcvtfp2fxs.i64.f64(double, i32) + +declare i32 @llvm.aarch64.neon.vcvtfp2fxu.i32.f32(float, i32) +declare i64 @llvm.aarch64.neon.vcvtfp2fxu.i64.f32(float, i32) +declare i32 @llvm.aarch64.neon.vcvtfp2fxu.i32.f64(double, i32) +declare i64 @llvm.aarch64.neon.vcvtfp2fxu.i64.f64(double, i32) + +; fptosi + +define i32 @fcvtzs_f32_i32_int(float %flt) { +; CHECK-LABEL: fcvtzs_f32_i32_int: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs w0, s0, #1 +; CHECK-NEXT: ret + %cvt = tail call i32 @llvm.aarch64.neon.vcvtfp2fxs.i32.f32(float %flt, i32 1) + ret i32 %cvt +} + +define i64 @fcvtzs_f32_i64_int(float %flt) { +; CHECK-LABEL: fcvtzs_f32_i64_int: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs x0, s0, #1 +; CHECK-NEXT: ret + %cvt = tail call i64 @llvm.aarch64.neon.vcvtfp2fxs.i64.f32(float %flt, i32 1) + ret i64 %cvt +} + +define i32 @fcvtzs_f64_i32_int(double %dbl) { +; CHECK-LABEL: fcvtzs_f64_i32_int: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs w0, d0, #1 +; CHECK-NEXT: ret + %cvt = tail call i32 @llvm.aarch64.neon.vcvtfp2fxs.i32.f64(double %dbl, i32 1) + ret i32 %cvt +} + +define i64 @fcvtzs_f64_i64_int(double %dbl) { +; CHECK-LABEL: fcvtzs_f64_i64_int: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzs x0, d0, #1 +; CHECK-NEXT: ret + %cvt = tail call i64 @llvm.aarch64.neon.vcvtfp2fxs.i64.f64(double %dbl, i32 1) + ret i64 %cvt +} + +; fptoui + +define i32 @fcvtzu_f32_i32_int(float %flt) { +; CHECK-LABEL: fcvtzu_f32_i32_int: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu w0, s0, #1 +; CHECK-NEXT: ret + %cvt = tail call i32 @llvm.aarch64.neon.vcvtfp2fxu.i32.f32(float %flt, i32 1) + ret i32 %cvt +} + +define i64 @fcvtzu_f32_i64_int(float %flt) { +; CHECK-LABEL: fcvtzu_f32_i64_int: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu x0, s0, #1 +; CHECK-NEXT: ret + %cvt = tail call i64 @llvm.aarch64.neon.vcvtfp2fxu.i64.f32(float %flt, i32 1) + ret i64 %cvt +} + +define i32 @fcvtzu_f64_i32_int(double %dbl) { +; CHECK-LABEL: fcvtzu_f64_i32_int: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu w0, d0, #1 +; CHECK-NEXT: ret + %cvt = tail call i32 @llvm.aarch64.neon.vcvtfp2fxu.i32.f64(double %dbl, i32 1) + ret i32 %cvt +} + +define i64 @fcvtzu_f64_i64_int(double %dbl) { +; CHECK-LABEL: fcvtzu_f64_i64_int: +; CHECK: // %bb.0: +; CHECK-NEXT: fcvtzu x0, d0, #1 +; CHECK-NEXT: ret + %cvt = tail call i64 @llvm.aarch64.neon.vcvtfp2fxu.i64.f64(double %dbl, i32 1) + ret i64 %cvt +} + +; sitofp + +define float @scvtf_f32_i32_int(i32 %int) { +; CHECK-LABEL: scvtf_f32_i32_int: +; CHECK: // %bb.0: +; CHECK-NEXT: scvtf s0, w0, #1 +; CHECK-NEXT: ret + %cvt = tail call float @llvm.aarch64.neon.vcvtfxs2fp.f32.i32(i32 %int, i32 1) + ret float %cvt +} + +define float @scvtf_f32_i64_int(i64 %long) { +; CHECK-LABEL: scvtf_f32_i64_int: +; CHECK: // %bb.0: +; CHECK-NEXT: scvtf s0, x0, #1 +; CHECK-NEXT: ret + %cvt = tail call float @llvm.aarch64.neon.vcvtfxs2fp.f32.i64(i64 %long, i32 1) + ret float %cvt +} + +define double @scvtf_f64_i32_int(i32 %int) { +; CHECK-LABEL: scvtf_f64_i32_int: +; CHECK: // %bb.0: +; CHECK-NEXT: scvtf d0, w0, #1 +; CHECK-NEXT: ret + %cvt = tail call double @llvm.aarch64.neon.vcvtfxs2fp.f64.i32(i32 %int, i32 1) + ret double %cvt +} + +define double @scvtf_f64_i64_int(i64 %long) { +; CHECK-LABEL: scvtf_f64_i64_int: +; CHECK: // %bb.0: +; CHECK-NEXT: scvtf d0, x0, #1 +; CHECK-NEXT: ret + %cvt = tail call double @llvm.aarch64.neon.vcvtfxs2fp.f64.i64(i64 %long, i32 1) + ret double %cvt +} + +; uitofp + +define float @ucvtf_f32_i32_int(i32 %int) { +; CHECK-LABEL: ucvtf_f32_i32_int: +; CHECK: // %bb.0: +; CHECK-NEXT: ucvtf s0, w0, #1 +; CHECK-NEXT: ret + %cvt = tail call float @llvm.aarch64.neon.vcvtfxu2fp.f32.i32(i32 %int, i32 1) + ret float %cvt +} + +define float @ucvtf_f32_i64_int(i64 %long) { +; CHECK-LABEL: ucvtf_f32_i64_int: +; CHECK: // %bb.0: +; CHECK-NEXT: ucvtf s0, x0, #1 +; CHECK-NEXT: ret + %cvt = tail call float @llvm.aarch64.neon.vcvtfxu2fp.f32.i64(i64 %long, i32 1) + ret float %cvt +} + +define double @ucvtf_f64_i32_int(i32 %int) { +; CHECK-LABEL: ucvtf_f64_i32_int: +; CHECK: // %bb.0: +; CHECK-NEXT: ucvtf d0, w0, #1 +; CHECK-NEXT: ret + %cvt = tail call double @llvm.aarch64.neon.vcvtfxu2fp.f64.i32(i32 %int, i32 1) + ret double %cvt +} + +define double @ucvtf_f64_i64_int(i64 %long) { +; CHECK-LABEL: ucvtf_f64_i64_int: +; CHECK: // %bb.0: +; CHECK-NEXT: ucvtf d0, x0, #1 +; CHECK-NEXT: ret + %cvt = tail call double @llvm.aarch64.neon.vcvtfxu2fp.f64.i64(i64 %long, i32 1) + ret double %cvt +} + ; fptoui define i32 @fcvtzs_f32_i32_7(float %flt) { Index: llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_2op.ll =================================================================== --- llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_2op.ll +++ llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_2op.ll @@ -160,8 +160,7 @@ define dso_local half @test_vcvth_n_f16_s32_1(i32 %a) { ; CHECK-LABEL: test_vcvth_n_f16_s32_1: -; CHECK: fmov s0, w0 -; CHECK-NEXT: scvtf h0, h0, #1 +; CHECK: scvtf h0, w0, #1 ; CHECK-NEXT: ret entry: %vcvth_n_f16_s32 = tail call half @llvm.aarch64.neon.vcvtfxs2fp.f16.i32(i32 %a, i32 1) @@ -170,8 +169,7 @@ define dso_local half @test_vcvth_n_f16_s32_16(i32 %a) { ; CHECK-LABEL: test_vcvth_n_f16_s32_16: -; CHECK: fmov s0, w0 -; CHECK-NEXT: scvtf h0, h0, #16 +; CHECK: scvtf h0, w0, #16 ; CHECK-NEXT: ret entry: %vcvth_n_f16_s32 = tail call half @llvm.aarch64.neon.vcvtfxs2fp.f16.i32(i32 %a, i32 16) @@ -180,8 +178,7 @@ define dso_local i16 @test_vcvth_n_s16_f16_1(half %a) { ; CHECK-LABEL: test_vcvth_n_s16_f16_1: -; CHECK: fcvtzs h0, h0, #1 -; CHECK-NEXT: fmov w0, s0 +; CHECK: fcvtzs w0, h0, #1 ; CHECK-NEXT: ret entry: %fcvth_n = tail call i32 @llvm.aarch64.neon.vcvtfp2fxs.i32.f16(half %a, i32 1) @@ -191,8 +188,7 @@ define dso_local i16 @test_vcvth_n_s16_f16_16(half %a) { ; CHECK-LABEL: test_vcvth_n_s16_f16_16: -; CHECK: fcvtzs h0, h0, #16 -; CHECK-NEXT: fmov w0, s0 +; CHECK: fcvtzs w0, h0, #16 ; CHECK-NEXT: ret entry: %fcvth_n = tail call i32 @llvm.aarch64.neon.vcvtfp2fxs.i32.f16(half %a, i32 16) @@ -202,8 +198,7 @@ define dso_local i32 @test_vcvth_n_s32_f16_1(half %a) { ; CHECK-LABEL: test_vcvth_n_s32_f16_1: -; CHECK: fcvtzs h0, h0, #1 -; CHECK-NEXT: fmov w0, s0 +; CHECK: fcvtzs w0, h0, #1 ; CHECK-NEXT: ret entry: %vcvth_n_s32_f16 = tail call i32 @llvm.aarch64.neon.vcvtfp2fxs.i32.f16(half %a, i32 1) @@ -212,8 +207,7 @@ define dso_local i32 @test_vcvth_n_s32_f16_16(half %a) { ; CHECK-LABEL: test_vcvth_n_s32_f16_16: -; CHECK: fcvtzs h0, h0, #16 -; CHECK-NEXT: fmov w0, s0 +; CHECK: fcvtzs w0, h0, #16 ; CHECK-NEXT: ret entry: %vcvth_n_s32_f16 = tail call i32 @llvm.aarch64.neon.vcvtfp2fxs.i32.f16(half %a, i32 16) @@ -222,8 +216,7 @@ define dso_local i64 @test_vcvth_n_s64_f16_1(half %a) { ; CHECK-LABEL: test_vcvth_n_s64_f16_1: -; CHECK: fcvtzs h0, h0, #1 -; CHECK-NEXT: fmov x0, d0 +; CHECK: fcvtzs x0, h0, #1 ; CHECK-NEXT: ret entry: %vcvth_n_s64_f16 = tail call i64 @llvm.aarch64.neon.vcvtfp2fxs.i64.f16(half %a, i32 1) @@ -232,8 +225,7 @@ define dso_local i64 @test_vcvth_n_s64_f16_32(half %a) { ; CHECK-LABEL: test_vcvth_n_s64_f16_32: -; CHECK: fcvtzs h0, h0, #32 -; CHECK-NEXT: fmov x0, d0 +; CHECK: fcvtzs x0, h0, #32 ; CHECK-NEXT: ret entry: %vcvth_n_s64_f16 = tail call i64 @llvm.aarch64.neon.vcvtfp2fxs.i64.f16(half %a, i32 32) @@ -262,8 +254,7 @@ define dso_local half @test_vcvth_n_f16_u32_1(i32 %a) { ; CHECK-LABEL: test_vcvth_n_f16_u32_1: -; CHECK: fmov s0, w0 -; CHECK-NEXT: ucvtf h0, h0, #1 +; CHECK: ucvtf h0, w0, #1 ; CHECK-NEXT: ret entry: %vcvth_n_f16_u32 = tail call half @llvm.aarch64.neon.vcvtfxu2fp.f16.i32(i32 %a, i32 1) @@ -272,7 +263,7 @@ define dso_local half @test_vcvth_n_f16_u32_16(i32 %a) { ; CHECK-LABEL: test_vcvth_n_f16_u32_16: -; CHECK: ucvtf h0, h0, #16 +; CHECK: ucvtf h0, w0, #16 ; CHECK-NEXT: ret entry: %vcvth_n_f16_u32 = tail call half @llvm.aarch64.neon.vcvtfxu2fp.f16.i32(i32 %a, i32 16) @@ -281,8 +272,7 @@ define dso_local i16 @test_vcvth_n_u16_f16_1(half %a) { ; CHECK-LABEL: test_vcvth_n_u16_f16_1: -; CHECK: fcvtzu h0, h0, #1 -; CHECK-NEXT: fmov w0, s0 +; CHECK: fcvtzu w0, h0, #1 ; CHECK-NEXT: ret entry: %fcvth_n = tail call i32 @llvm.aarch64.neon.vcvtfp2fxu.i32.f16(half %a, i32 1) @@ -292,8 +282,7 @@ define dso_local i16 @test_vcvth_n_u16_f16_16(half %a) { ; CHECK-LABEL: test_vcvth_n_u16_f16_16: -; CHECK: fcvtzu h0, h0, #16 -; CHECK-NEXT: fmov w0, s0 +; CHECK: fcvtzu w0, h0, #16 ; CHECK-NEXT: ret entry: %fcvth_n = tail call i32 @llvm.aarch64.neon.vcvtfp2fxu.i32.f16(half %a, i32 16) @@ -303,8 +292,7 @@ define dso_local i32 @test_vcvth_n_u32_f16_1(half %a) { ; CHECK-LABEL: test_vcvth_n_u32_f16_1: -; CHECK: fcvtzu h0, h0, #1 -; CHECK-NEXT: fmov w0, s0 +; CHECK: fcvtzu w0, h0, #1 ; CHECK-NEXT: ret entry: %vcvth_n_u32_f16 = tail call i32 @llvm.aarch64.neon.vcvtfp2fxu.i32.f16(half %a, i32 1) @@ -313,8 +301,7 @@ define dso_local i32 @test_vcvth_n_u32_f16_16(half %a) { ; CHECK-LABEL: test_vcvth_n_u32_f16_16: -; CHECK: fcvtzu h0, h0, #16 -; CHECK-NEXT: fmov w0, s0 +; CHECK: fcvtzu w0, h0, #16 ; CHECK-NEXT: ret entry: %vcvth_n_u32_f16 = tail call i32 @llvm.aarch64.neon.vcvtfp2fxu.i32.f16(half %a, i32 16) @@ -345,8 +332,7 @@ define dso_local half @vcvth_n_f16_s64_test(i64 %a) { ; CHECK-LABEL: vcvth_n_f16_s64_test: -; CHECK: fmov d0, x0 -; CHECK-NEXT: scvtf h0, h0, #16 +; CHECK: scvtf h0, x0, #16 ; CHECK-NEXT: ret entry: %vcvth_n_f16_s64 = tail call half @llvm.aarch64.neon.vcvtfxs2fp.f16.i64(i64 %a, i32 16)