Index: lib/Target/AArch64/AArch64InstrFormats.td =================================================================== --- lib/Target/AArch64/AArch64InstrFormats.td +++ lib/Target/AArch64/AArch64InstrFormats.td @@ -7923,27 +7923,6 @@ multiclass SIMDFPScalarRShift opc, string asm> { let Predicates = [HasNEON, HasFullFP16] in { - def HSr : BaseSIMDScalarShift { - let Inst{19-16} = imm{3-0}; - let Inst{23-22} = 0b11; - } - def SHr : BaseSIMDScalarShift { - let Inst{19-16} = imm{3-0}; - let Inst{22-21} = 0b01; - } - def HDr : BaseSIMDScalarShift { - let Inst{21-16} = imm{5-0}; - let Inst{23-22} = 0b11; - } - def DHr : BaseSIMDScalarShift { - let Inst{21-16} = imm{5-0}; - let Inst{23-22} = 0b01; - let Inst{31} = 1; - } def h : BaseSIMDScalarShift { let Inst{19-16} = imm{3-0}; Index: lib/Target/AArch64/AArch64InstrInfo.td =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.td +++ lib/Target/AArch64/AArch64InstrInfo.td @@ -4955,16 +4955,6 @@ def : Pat<(v1i64 (int_aarch64_neon_vcvtfp2fxu (v1f64 FPR64:$Rn), vecshiftR64:$imm)), (FCVTZUd FPR64:$Rn, vecshiftR64:$imm)>; -def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i64 FPR64:$Rn), vecshiftR16:$imm)), - (FCVTZSHDr (i64 FPR64:$Rn), vecshiftR32:$imm)>; -def : Pat<(i32 (int_aarch64_neon_vcvtfp2fxu FPR16:$Rn, vecshiftR32:$imm)), - (FCVTZUSHr FPR16:$Rn, vecshiftR32:$imm)>; -def : Pat<(i32 (int_aarch64_neon_vcvtfp2fxs FPR16:$Rn, vecshiftR32:$imm)), - (FCVTZSSHr FPR16:$Rn, vecshiftR32:$imm)>; -def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxs (f16 FPR16:$Rn), vecshiftR64:$imm)), - (FCVTZSDHr (f16 FPR16:$Rn), vecshiftR64:$imm)>; -def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp FPR32:$Rn, vecshiftR16:$imm)), - (UCVTFHSr FPR32:$Rn, vecshiftR16:$imm)>; def : Pat<(int_aarch64_neon_vcvtfxu2fp FPR32:$Rn, vecshiftR32:$imm), (UCVTFs FPR32:$Rn, vecshiftR32:$imm)>; def : Pat<(f64 (int_aarch64_neon_vcvtfxu2fp (i64 FPR64:$Rn), vecshiftR64:$imm)), @@ -4972,10 +4962,6 @@ def : Pat<(v1f64 (int_aarch64_neon_vcvtfxs2fp (v1i64 FPR64:$Rn), vecshiftR64:$imm)), (SCVTFd FPR64:$Rn, vecshiftR64:$imm)>; -def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i32 FPR32:$Rn), vecshiftR16:$imm)), - (SCVTFHSr FPR32:$Rn, vecshiftR16:$imm)>; -def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp FPR32:$Rn, vecshiftR16:$imm)), - (SCVTFHSr FPR32:$Rn, vecshiftR16:$imm)>; def : Pat<(f64 (int_aarch64_neon_vcvtfxs2fp (i64 FPR64:$Rn), vecshiftR64:$imm)), (SCVTFd FPR64:$Rn, vecshiftR64:$imm)>; def : Pat<(v1f64 (int_aarch64_neon_vcvtfxu2fp (v1i64 FPR64:$Rn), @@ -4984,6 +4970,43 @@ def : Pat<(int_aarch64_neon_vcvtfxs2fp FPR32:$Rn, vecshiftR32:$imm), (SCVTFs FPR32:$Rn, vecshiftR32:$imm)>; +// Patterns for FP16 Instrinsics - requires reg copy to/from as i16s not supported. + +def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i32 (sext_inreg FPR32:$Rn, i16)), vecshiftR16:$imm)), + (SCVTFh (EXTRACT_SUBREG FPR32:$Rn, hsub), vecshiftR16:$imm)>; +def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i32 FPR32:$Rn), vecshiftR16:$imm)), + (SCVTFh (EXTRACT_SUBREG FPR32:$Rn, hsub), vecshiftR16:$imm)>; +def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i64 FPR64:$Rn), vecshiftR16:$imm)), + (SCVTFh (EXTRACT_SUBREG FPR64:$Rn, hsub), vecshiftR16:$imm)>; +def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp + (and FPR32:$Rn, (i32 65535)), + vecshiftR16:$imm)), + (UCVTFh (EXTRACT_SUBREG FPR32:$Rn, hsub), vecshiftR16:$imm)>; +def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp FPR32:$Rn, vecshiftR16:$imm)), + (UCVTFh (EXTRACT_SUBREG FPR32:$Rn, hsub), vecshiftR16:$imm)>; +def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp (i64 FPR64:$Rn), vecshiftR16:$imm)), + (UCVTFh (EXTRACT_SUBREG FPR64:$Rn, hsub), vecshiftR16:$imm)>; +def : Pat<(i32 (int_aarch64_neon_vcvtfp2fxs (f16 FPR16:$Rn), vecshiftR32:$imm)), + (i32 (INSERT_SUBREG + (i32 (IMPLICIT_DEF)), + (FCVTZSh FPR16:$Rn, vecshiftR32:$imm), + hsub))>; +def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxs (f16 FPR16:$Rn), vecshiftR64:$imm)), + (i64 (INSERT_SUBREG + (i64 (IMPLICIT_DEF)), + (FCVTZSh FPR16:$Rn, vecshiftR64:$imm), + hsub))>; +def : Pat<(i32 (int_aarch64_neon_vcvtfp2fxu (f16 FPR16:$Rn), vecshiftR32:$imm)), + (i32 (INSERT_SUBREG + (i32 (IMPLICIT_DEF)), + (FCVTZUh FPR16:$Rn, vecshiftR32:$imm), + hsub))>; +def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxu (f16 FPR16:$Rn), vecshiftR64:$imm)), + (i64 (INSERT_SUBREG + (i64 (IMPLICIT_DEF)), + (FCVTZUh FPR16:$Rn, vecshiftR64:$imm), + hsub))>; + defm SHL : SIMDScalarLShiftD< 0, 0b01010, "shl", AArch64vshl>; defm SLI : SIMDScalarLShiftDTied<1, 0b01010, "sli">; defm SQRSHRN : SIMDScalarRShiftBHS< 0, 0b10011, "sqrshrn", Index: lib/Target/AArch64/AArch64RegisterInfo.h =================================================================== --- lib/Target/AArch64/AArch64RegisterInfo.h +++ lib/Target/AArch64/AArch64RegisterInfo.h @@ -46,6 +46,10 @@ return 5; } + const TargetRegisterClass * + getSubClassWithSubReg(const TargetRegisterClass *RC, + unsigned Idx) const override; + // Calls involved in thread-local variable lookup save more registers than // normal calls, so they need a different mask to represent this. const uint32_t *getTLSCallPreservedMask() const; Index: lib/Target/AArch64/AArch64RegisterInfo.cpp =================================================================== --- lib/Target/AArch64/AArch64RegisterInfo.cpp +++ lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -72,6 +72,19 @@ return nullptr; } +const TargetRegisterClass * +AArch64RegisterInfo::getSubClassWithSubReg(const TargetRegisterClass *RC, + unsigned Idx) const { + // edge case for GPR/FPR register classes + if (RC == &AArch64::GPR32allRegClass && Idx == AArch64::hsub) + return &AArch64::FPR32RegClass; + else if (RC == &AArch64::GPR64allRegClass && Idx == AArch64::hsub) + return &AArch64::FPR64RegClass; + + // Forward to TableGen's default version. + return AArch64GenRegisterInfo::getSubClassWithSubReg(RC, Idx); +} + const uint32_t * AArch64RegisterInfo::getCallPreservedMask(const MachineFunction &MF, CallingConv::ID CC) const { Index: test/CodeGen/AArch64/fp16_intrinsic_scalar_2op.ll =================================================================== --- test/CodeGen/AArch64/fp16_intrinsic_scalar_2op.ll +++ test/CodeGen/AArch64/fp16_intrinsic_scalar_2op.ll @@ -136,9 +136,8 @@ define dso_local half @test_vcvth_n_f16_s16_1(i16 %a) { ; CHECK-LABEL: test_vcvth_n_f16_s16_1: -; CHECK: sxth w[[wReg:[0-9]+]], w0 -; CHECK-NEXT: fmov s0, w[[wReg:[0-9]+]] -; CHECK-NEXT: scvtf h0, s0, #1 +; CHECK: fmov s0, w[[wReg:[0-9]+]] +; CHECK-NEXT: scvtf h0, h0, #1 ; CHECK-NEXT: ret entry: %sext = sext i16 %a to i32 @@ -148,9 +147,8 @@ define dso_local half @test_vcvth_n_f16_s16_16(i16 %a) { ; CHECK-LABEL: test_vcvth_n_f16_s16_16: -; CHECK: sxth w[[wReg:[0-9]+]], w0 -; CHECK-NEXT: fmov s0, w[[wReg:[0-9]+]] -; CHECK-NEXT: scvtf h0, s0, #16 +; CHECK: fmov s0, w[[wReg:[0-9]+]] +; CHECK-NEXT: scvtf h0, h0, #16 ; CHECK-NEXT: ret entry: %sext = sext i16 %a to i32 @@ -161,7 +159,7 @@ define dso_local half @test_vcvth_n_f16_s32_1(i32 %a) { ; CHECK-LABEL: test_vcvth_n_f16_s32_1: ; CHECK: fmov s0, w0 -; CHECK-NEXT: scvtf h0, s0, #1 +; CHECK-NEXT: scvtf h0, h0, #1 ; CHECK-NEXT: ret entry: %vcvth_n_f16_s32 = tail call half @llvm.aarch64.neon.vcvtfxs2fp.f16.i32(i32 %a, i32 1) @@ -171,7 +169,7 @@ define dso_local half @test_vcvth_n_f16_s32_16(i32 %a) { ; CHECK-LABEL: test_vcvth_n_f16_s32_16: ; CHECK: fmov s0, w0 -; CHECK-NEXT: scvtf h0, s0, #16 +; CHECK-NEXT: scvtf h0, h0, #16 ; CHECK-NEXT: ret entry: %vcvth_n_f16_s32 = tail call half @llvm.aarch64.neon.vcvtfxs2fp.f16.i32(i32 %a, i32 16) @@ -181,7 +179,7 @@ define dso_local half @test_vcvth_n_f16_s64_1(i64 %a) { ; CHECK-LABEL: test_vcvth_n_f16_s64_1: ; CHECK: fmov d0, x0 -; CHECK-NEXT: fcvtzs h0, d0, #1 +; CHECK-NEXT: scvtf h0, h0, #1 ; CHECK-NEXT: ret entry: %vcvth_n_f16_s64 = tail call half @llvm.aarch64.neon.vcvtfxs2fp.f16.i64(i64 %a, i32 1) @@ -191,7 +189,7 @@ define dso_local half @test_vcvth_n_f16_s64_16(i64 %a) { ; CHECK-LABEL: test_vcvth_n_f16_s64_16: ; CHECK: fmov d0, x0 -; CHECK-NEXT: fcvtzs h0, d0, #16 +; CHECK-NEXT: scvtf h0, h0, #16 ; CHECK-NEXT: ret entry: %vcvth_n_f16_s64 = tail call half @llvm.aarch64.neon.vcvtfxs2fp.f16.i64(i64 %a, i32 16) @@ -200,7 +198,7 @@ define dso_local i16 @test_vcvth_n_s16_f16_1(half %a) { ; CHECK-LABEL: test_vcvth_n_s16_f16_1: -; CHECK: fcvtzs s0, h0, #1 +; CHECK: fcvtzs h0, h0, #1 ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret entry: @@ -211,7 +209,7 @@ define dso_local i16 @test_vcvth_n_s16_f16_16(half %a) { ; CHECK-LABEL: test_vcvth_n_s16_f16_16: -; CHECK: fcvtzs s0, h0, #16 +; CHECK: fcvtzs h0, h0, #16 ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret entry: @@ -222,7 +220,7 @@ define dso_local i32 @test_vcvth_n_s32_f16_1(half %a) { ; CHECK-LABEL: test_vcvth_n_s32_f16_1: -; CHECK: fcvtzs s0, h0, #1 +; CHECK: fcvtzs h0, h0, #1 ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret entry: @@ -232,7 +230,7 @@ define dso_local i32 @test_vcvth_n_s32_f16_16(half %a) { ; CHECK-LABEL: test_vcvth_n_s32_f16_16: -; CHECK: fcvtzs s0, h0, #16 +; CHECK: fcvtzs h0, h0, #16 ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret entry: @@ -242,7 +240,7 @@ define dso_local i64 @test_vcvth_n_s64_f16_1(half %a) { ; CHECK-LABEL: test_vcvth_n_s64_f16_1: -; CHECK: fcvtzs d0, h0, #1 +; CHECK: fcvtzs h0, h0, #1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret entry: @@ -252,7 +250,7 @@ define dso_local i64 @test_vcvth_n_s64_f16_32(half %a) { ; CHECK-LABEL: test_vcvth_n_s64_f16_32: -; CHECK: fcvtzs d0, h0, #32 +; CHECK: fcvtzs h0, h0, #32 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret entry: @@ -262,9 +260,7 @@ define dso_local half @test_vcvth_n_f16_u16_1(i16 %a) { ; CHECK-LABEL: test_vcvth_n_f16_u16_1: -; CHECK: and w[[wReg:[0-9]+]], w0, #0xffff -; CHECK-NEXT: fmov s0, w[[wReg:[0-9]+]] -; CHECK-NEXT: ucvtf h0, s0, #1 +; CHECK: ucvtf h0, h0, #1 ; CHECK-NEXT: ret entry: %0 = zext i16 %a to i32 @@ -274,9 +270,7 @@ define dso_local half @test_vcvth_n_f16_u16_16(i16 %a) { ; CHECK-LABEL: test_vcvth_n_f16_u16_16: -; CHECK: and w[[wReg:[0-9]+]], w0, #0xffff -; CHECK-NEXT: fmov s0, w[[wReg:[0-9]+]] -; CHECK-NEXT: ucvtf h0, s0, #16 +; CHECK: ucvtf h0, h0, #16 ; CHECK-NEXT: ret entry: %0 = zext i16 %a to i32 @@ -287,7 +281,7 @@ define dso_local half @test_vcvth_n_f16_u32_1(i32 %a) { ; CHECK-LABEL: test_vcvth_n_f16_u32_1: ; CHECK: fmov s0, w0 -; CHECK-NEXT: ucvtf h0, s0, #1 +; CHECK-NEXT: ucvtf h0, h0, #1 ; CHECK-NEXT: ret entry: %vcvth_n_f16_u32 = tail call half @llvm.aarch64.neon.vcvtfxu2fp.f16.i32(i32 %a, i32 1) @@ -296,8 +290,7 @@ define dso_local half @test_vcvth_n_f16_u32_16(i32 %a) { ; CHECK-LABEL: test_vcvth_n_f16_u32_16: -; CHECK: fmov s0, w0 -; CHECK-NEXT: ucvtf h0, s0, #16 +; CHECK: ucvtf h0, h0, #16 ; CHECK-NEXT: ret entry: %vcvth_n_f16_u32 = tail call half @llvm.aarch64.neon.vcvtfxu2fp.f16.i32(i32 %a, i32 16) @@ -306,7 +299,7 @@ define dso_local i16 @test_vcvth_n_u16_f16_1(half %a) { ; CHECK-LABEL: test_vcvth_n_u16_f16_1: -; CHECK: fcvtzu s0, h0, #1 +; CHECK: fcvtzu h0, h0, #1 ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret entry: @@ -317,7 +310,7 @@ define dso_local i16 @test_vcvth_n_u16_f16_16(half %a) { ; CHECK-LABEL: test_vcvth_n_u16_f16_16: -; CHECK: fcvtzu s0, h0, #16 +; CHECK: fcvtzu h0, h0, #16 ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret entry: @@ -328,7 +321,7 @@ define dso_local i32 @test_vcvth_n_u32_f16_1(half %a) { ; CHECK-LABEL: test_vcvth_n_u32_f16_1: -; CHECK: fcvtzu s0, h0, #1 +; CHECK: fcvtzu h0, h0, #1 ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret entry: @@ -338,7 +331,7 @@ define dso_local i32 @test_vcvth_n_u32_f16_16(half %a) { ; CHECK-LABEL: test_vcvth_n_u32_f16_16: -; CHECK: fcvtzu s0, h0, #16 +; CHECK: fcvtzu h0, h0, #16 ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret entry: