Index: lib/Target/AArch64/AArch64InstrFormats.td =================================================================== --- lib/Target/AArch64/AArch64InstrFormats.td +++ lib/Target/AArch64/AArch64InstrFormats.td @@ -5979,8 +5979,7 @@ def #NAME#32 : BaseSIMDThreeScalar; let Predicates = [HasNEON, HasFullFP16] in { - def #NAME#16 : BaseSIMDThreeScalar; + def #NAME#16 : BaseSIMDThreeScalar; } // Predicates = [HasNEON, HasFullFP16] } @@ -7790,22 +7789,55 @@ } -multiclass SIMDFPScalarRShift opc, string asm> { +multiclass SIMDFPScalarRShift opc, string asm, + SDPatternOperator OpNode=null_frag> { + let Predicates = [HasNEON, HasFullFP16] in { - def h : BaseSIMDScalarShift { + def SHr : BaseSIMDScalarShift{ let Inst{19-16} = imm{3-0}; + let Inst{23-22} = 0b11; } + } // Predicates = [HasNEON, HasFullFP16] + + def HDr : BaseSIMDScalarShift{ + let Inst{21-16} = imm{5-0}; + let Inst{23-22} = 0b11; + } + + + def DHr : BaseSIMDScalarShift{ + let Inst{21-16} = imm{5-0}; + let Inst{23-22} = 0b11; + let Inst{31} = 1; + } + def s : BaseSIMDScalarShift { + FPR32, FPR32, vecshiftR32, asm, + []> { let Inst{20-16} = imm{4-0}; } def d : BaseSIMDScalarShift { + FPR64, FPR64, vecshiftR16, asm, + []>{ let Inst{21-16} = imm{5-0}; } + + let Predicates = [HasNEON, HasFullFP16] in { + def h : BaseSIMDScalarShift{ + let Inst{19-16} = imm{3-0}; + } + } // Predicates = [HasNEON, HasFullFP16] + } multiclass SIMDScalarRShiftD opc, string asm, Index: lib/Target/AArch64/AArch64InstrInfo.td =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.td +++ lib/Target/AArch64/AArch64InstrInfo.td @@ -4866,10 +4866,10 @@ //---------------------------------------------------------------------------- // AdvSIMD scalar shift instructions //---------------------------------------------------------------------------- -defm FCVTZS : SIMDFPScalarRShift<0, 0b11111, "fcvtzs">; -defm FCVTZU : SIMDFPScalarRShift<1, 0b11111, "fcvtzu">; -defm SCVTF : SIMDFPScalarRShift<0, 0b11100, "scvtf">; -defm UCVTF : SIMDFPScalarRShift<1, 0b11100, "ucvtf">; +defm FCVTZS : SIMDFPScalarRShift<0, 0b11111, "fcvtzs", int_aarch64_neon_vcvtfp2fxs>; +defm FCVTZU : SIMDFPScalarRShift<1, 0b11111, "fcvtzu", int_aarch64_neon_vcvtfp2fxu>; +defm SCVTF : SIMDFPScalarRShift<0, 0b11100, "scvtf", int_aarch64_neon_vcvtfxs2fp>; +defm UCVTF : SIMDFPScalarRShift<1, 0b11100, "ucvtf", int_aarch64_neon_vcvtfxu2fp>; // Codegen patterns for the above. We don't put these directly on the // instructions because TableGen's type inference can't handle the truth. // Having the same base pattern for fp <--> int totally freaks it out. @@ -4887,17 +4887,29 @@ def : Pat<(v1i64 (int_aarch64_neon_vcvtfp2fxu (v1f64 FPR64:$Rn), vecshiftR64:$imm)), (FCVTZUd FPR64:$Rn, vecshiftR64:$imm)>; -def : Pat<(int_aarch64_neon_vcvtfxs2fp FPR32:$Rn, vecshiftR32:$imm), - (SCVTFs FPR32:$Rn, vecshiftR32:$imm)>; +def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i64 FPR64:$Rn), vecshiftR16:$imm)), + (FCVTZSHDr (i64 FPR64:$Rn), vecshiftR16:$imm)>; +def : Pat<(i32 (int_aarch64_neon_vcvtfp2fxu FPR16:$Rn, vecshiftR32:$imm)), + (FCVTZUSHr FPR16:$Rn, vecshiftR32:$imm)>; +def : Pat<(i32 (int_aarch64_neon_vcvtfp2fxs FPR16:$Rn, vecshiftR32:$imm)), + (FCVTZSSHr FPR16:$Rn, vecshiftR32:$imm)>; +def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxs (f16 FPR16:$Rn), vecshiftR64:$imm)), + (FCVTZSDHr (f16 FPR16:$Rn), vecshiftR64:$imm)>; +def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp FPR32:$Rn, vecshiftR16:$imm)), + (UCVTFh FPR32:$Rn, vecshiftR16:$imm)>; def : Pat<(int_aarch64_neon_vcvtfxu2fp FPR32:$Rn, vecshiftR32:$imm), (UCVTFs FPR32:$Rn, vecshiftR32:$imm)>; -def : Pat<(f64 (int_aarch64_neon_vcvtfxs2fp (i64 FPR64:$Rn), vecshiftR64:$imm)), - (SCVTFd FPR64:$Rn, vecshiftR64:$imm)>; def : Pat<(f64 (int_aarch64_neon_vcvtfxu2fp (i64 FPR64:$Rn), vecshiftR64:$imm)), (UCVTFd FPR64:$Rn, vecshiftR64:$imm)>; def : Pat<(v1f64 (int_aarch64_neon_vcvtfxs2fp (v1i64 FPR64:$Rn), vecshiftR64:$imm)), (SCVTFd FPR64:$Rn, vecshiftR64:$imm)>; +def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i32 FPR32:$Rn), vecshiftR16:$imm)), + (SCVTFh FPR32:$Rn, vecshiftR16:$imm)>; +def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp FPR32:$Rn, vecshiftR16:$imm)), + (SCVTFh FPR32:$Rn, vecshiftR16:$imm)>; +def : Pat<(f64 (int_aarch64_neon_vcvtfxs2fp (i64 FPR64:$Rn), vecshiftR64:$imm)), + (SCVTFd FPR64:$Rn, vecshiftR64:$imm)>; def : Pat<(v1f64 (int_aarch64_neon_vcvtfxu2fp (v1i64 FPR64:$Rn), vecshiftR64:$imm)), (UCVTFd FPR64:$Rn, vecshiftR64:$imm)>; Index: test/CodeGen/AArch64/fp16_intrinsic_scalar_2op.ll =================================================================== --- test/CodeGen/AArch64/fp16_intrinsic_scalar_2op.ll +++ test/CodeGen/AArch64/fp16_intrinsic_scalar_2op.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=aarch64-eabi -mattr=+v8.2a,+fullfp16 | FileCheck %s declare half @llvm.aarch64.sisd.fabd.f16(half, half) @@ -10,7 +11,8 @@ define dso_local half @t_vabdh_f16(half %a, half %b) { ; CHECK-LABEL: t_vabdh_f16: -; CHECK: fabd h0, h0, h1 +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fabd h0, h0, h1 ; CHECK-NEXT: ret entry: %vabdh_f16 = tail call half @llvm.aarch64.sisd.fabd.f16(half %a, half %b) @@ -19,7 +21,8 @@ define dso_local half @t_vabdh_f16_from_fsub_fabs(half %a, half %b) { ; CHECK-LABEL: t_vabdh_f16_from_fsub_fabs: -; CHECK: fabd h0, h0, h1 +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fabd h0, h0, h1 ; CHECK-NEXT: ret entry: %sub = fsub half %a, %b @@ -29,7 +32,8 @@ define dso_local i16 @t_vceqh_f16(half %a, half %b) { ; CHECK-LABEL: t_vceqh_f16: -; CHECK: fcmp h0, h1 +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcmp h0, h1 ; CHECK-NEXT: csetm w0, eq ; CHECK-NEXT: ret entry: @@ -40,7 +44,8 @@ define dso_local i16 @t_vcgeh_f16(half %a, half %b) { ; CHECK-LABEL: t_vcgeh_f16: -; CHECK: fcmp h0, h1 +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcmp h0, h1 ; CHECK-NEXT: csetm w0, ge ; CHECK-NEXT: ret entry: @@ -51,7 +56,8 @@ define dso_local i16 @t_vcgth_f16(half %a, half %b) { ; CHECK-LABEL: t_vcgth_f16: -; CHECK: fcmp h0, h1 +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcmp h0, h1 ; CHECK-NEXT: csetm w0, gt ; CHECK-NEXT: ret entry: @@ -62,7 +68,8 @@ define dso_local i16 @t_vcleh_f16(half %a, half %b) { ; CHECK-LABEL: t_vcleh_f16: -; CHECK: fcmp h0, h1 +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcmp h0, h1 ; CHECK-NEXT: csetm w0, ls ; CHECK-NEXT: ret entry: @@ -73,7 +80,8 @@ define dso_local i16 @t_vclth_f16(half %a, half %b) { ; CHECK-LABEL: t_vclth_f16: -; CHECK: fcmp h0, h1 +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcmp h0, h1 ; CHECK-NEXT: csetm w0, mi ; CHECK-NEXT: ret entry: @@ -84,7 +92,8 @@ define dso_local half @t_vmaxh_f16(half %a, half %b) { ; CHECK-LABEL: t_vmaxh_f16: -; CHECK: fmax h0, h0, h1 +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmax h0, h0, h1 ; CHECK-NEXT: ret entry: %vmax = tail call half @llvm.aarch64.neon.fmax.f16(half %a, half %b) @@ -93,7 +102,8 @@ define dso_local half @t_vminh_f16(half %a, half %b) { ; CHECK-LABEL: t_vminh_f16: -; CHECK: fmin h0, h0, h1 +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmin h0, h0, h1 ; CHECK-NEXT: ret entry: %vmin = tail call half @llvm.aarch64.neon.fmin.f16(half %a, half %b) @@ -102,7 +112,8 @@ define dso_local half @t_vmulxh_f16(half %a, half %b) { ; CHECK-LABEL: t_vmulxh_f16: -; CHECK: fmulx h0, h0, h1 +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmulx h0, h0, h1 ; CHECK-NEXT: ret entry: %vmulxh_f16 = tail call half @llvm.aarch64.neon.fmulx.f16(half %a, half %b) @@ -111,7 +122,8 @@ define dso_local half @t_vrecpsh_f16(half %a, half %b) { ; CHECK-LABEL: t_vrecpsh_f16: -; CHECK: frecps h0, h0, h1 +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: frecps h0, h0, h1 ; CHECK-NEXT: ret entry: %vrecps = tail call half @llvm.aarch64.neon.frecps.f16(half %a, half %b) @@ -120,9 +132,134 @@ define dso_local half @t_vrsqrtsh_f16(half %a, half %b) { ; CHECK-LABEL: t_vrsqrtsh_f16: -; CHECK: frsqrts h0, h0, h1 +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: frsqrts h0, h0, h1 ; CHECK-NEXT: ret entry: %vrsqrtsh_f16 = tail call half @llvm.aarch64.neon.frsqrts.f16(half %a, half %b) ret half %vrsqrtsh_f16 } + + +define dso_local half @test_vcvth_n_f16_s16(i16 %a) local_unnamed_addr #0 { +; CHECK-LABEL: test_vcvth_n_f16_s16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sxth w8, w0 +; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: scvtf h0, s0, #1 +; CHECK-NEXT: ret +entry: + %sext = sext i16 %a to i32 + %fcvth_n = tail call half @llvm.aarch64.neon.vcvtfxs2fp.f16.i32(i32 %sext, i32 1) + ret half %fcvth_n +} + +declare half @llvm.aarch64.neon.vcvtfxs2fp.f16.i32(i32, i32) #1 +declare half @llvm.aarch64.neon.vcvtfxs2fp.f16.i64(i64, i32) #1 +declare i32 @llvm.aarch64.neon.vcvtfp2fxs.i32.f16(half, i32) #1 +declare i64 @llvm.aarch64.neon.vcvtfp2fxs.i64.f16(half, i32) #1 +declare half @llvm.aarch64.neon.vcvtfxu2fp.f16.i32(i32, i32) #1 +declare i32 @llvm.aarch64.neon.vcvtfp2fxu.i32.f16(half, i32) #1 + +define dso_local half @test_vcvth_n_f16_s32(i32 %a) local_unnamed_addr #0 { +; CHECK-LABEL: test_vcvth_n_f16_s32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmov s0, w0 +; CHECK-NEXT: scvtf h0, s0, #1 +; CHECK-NEXT: ret +entry: + %vcvth_n_f16_s32 = tail call half @llvm.aarch64.neon.vcvtfxs2fp.f16.i32(i32 %a, i32 1) + ret half %vcvth_n_f16_s32 +} + +define dso_local half @test_vcvth_n_f16_s64(i64 %a) local_unnamed_addr #0 { +; CHECK-LABEL: test_vcvth_n_f16_s64: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmov d0, x0 +; CHECK-NEXT: fcvtzs h0, d0, #1 +; CHECK-NEXT: ret +entry: + %vcvth_n_f16_s64 = tail call half @llvm.aarch64.neon.vcvtfxs2fp.f16.i64(i64 %a, i32 1) + ret half %vcvth_n_f16_s64 +} + +define dso_local i16 @test_vcvth_n_s16_f16(half %a) local_unnamed_addr #0 { +; CHECK-LABEL: test_vcvth_n_s16_f16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs s0, h0, #1 +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret +entry: + %fcvth_n = tail call i32 @llvm.aarch64.neon.vcvtfp2fxs.i32.f16(half %a, i32 1) + %0 = trunc i32 %fcvth_n to i16 + ret i16 %0 +} + +define dso_local i32 @test_vcvth_n_s32_f16(half %a) local_unnamed_addr #0 { +; CHECK-LABEL: test_vcvth_n_s32_f16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs s0, h0, #1 +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret +entry: + %vcvth_n_s32_f16 = tail call i32 @llvm.aarch64.neon.vcvtfp2fxs.i32.f16(half %a, i32 1) + ret i32 %vcvth_n_s32_f16 +} + +define dso_local i64 @test_vcvth_n_s64_f16(half %a) local_unnamed_addr #0 { +; CHECK-LABEL: test_vcvth_n_s64_f16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzs d0, h0, #1 +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: ret +entry: + %vcvth_n_s64_f16 = tail call i64 @llvm.aarch64.neon.vcvtfp2fxs.i64.f16(half %a, i32 1) + ret i64 %vcvth_n_s64_f16 +} + +define dso_local half @test_vcvth_n_f16_u16(i16 %a) local_unnamed_addr #0 { +; CHECK-LABEL: test_vcvth_n_f16_u16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: and w8, w0, #0xffff +; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: ucvtf h0, s0, #1 +; CHECK-NEXT: ret +entry: + %0 = zext i16 %a to i32 + %fcvth_n = tail call half @llvm.aarch64.neon.vcvtfxu2fp.f16.i32(i32 %0, i32 1) + ret half %fcvth_n +} + +define dso_local half @test_vcvth_n_f16_u32(i32 %a) local_unnamed_addr #0 { +; CHECK-LABEL: test_vcvth_n_f16_u32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fmov s0, w0 +; CHECK-NEXT: ucvtf h0, s0, #1 +; CHECK-NEXT: ret +entry: + %vcvth_n_f16_u32 = tail call half @llvm.aarch64.neon.vcvtfxu2fp.f16.i32(i32 %a, i32 1) + ret half %vcvth_n_f16_u32 +} + +define dso_local i16 @test_vcvth_n_u16_f16(half %a) local_unnamed_addr #0 { +; CHECK-LABEL: test_vcvth_n_u16_f16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzu s0, h0, #1 +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret +entry: + %fcvth_n = tail call i32 @llvm.aarch64.neon.vcvtfp2fxu.i32.f16(half %a, i32 1) + %0 = trunc i32 %fcvth_n to i16 + ret i16 %0 +} + +define dso_local i32 @test_vcvth_n_u32_f16(half %a) local_unnamed_addr #0 { +; CHECK-LABEL: test_vcvth_n_u32_f16: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtzu s0, h0, #1 +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret +entry: + %vcvth_n_u32_f16 = tail call i32 @llvm.aarch64.neon.vcvtfp2fxu.i32.f16(half %a, i32 1) + ret i32 %vcvth_n_u32_f16 +}