Index: lib/Target/ARM/ARMCallingConv.td =================================================================== --- lib/Target/ARM/ARMCallingConv.td +++ lib/Target/ARM/ARMCallingConv.td @@ -21,7 +21,7 @@ // Handles byval parameters. CCIfByVal>, - CCIfType<[i1, i8, i16], CCPromoteToType>, + CCIfType<[i1, i8, i16, f16], CCPromoteToType>, // Pass SwiftSelf in a callee saved register. CCIfSwiftSelf>>, @@ -46,7 +46,7 @@ def RetCC_ARM_APCS : CallingConv<[ CCIfType<[i1, i8, i16], CCPromoteToType>, - CCIfType<[f32], CCBitConvertToType>, + CCIfType<[f16, f32], CCBitConvertToType>, // Pass SwiftSelf in a callee saved register. CCIfSwiftSelf>>, @@ -74,7 +74,7 @@ CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>, CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>, - CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8, + CCIfType<[f16, f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, S15]>>, // CPRCs may be allocated to co-processor registers or the stack - they @@ -93,7 +93,7 @@ CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>, CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>, - CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8, + CCIfType<[f16, f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, S15]>>, CCDelegateTo ]>; @@ -109,7 +109,7 @@ CCIfType<[v2f64], CCAssignToReg<[Q4, Q5]>>, CCIfType<[f64], CCAssignToReg<[D8, D9, D10, D11]>>, - CCIfType<[f32], CCAssignToReg<[S16, S17, S18, S19, S20, S21, S22, S23]>>, + CCIfType<[f16, f32], CCAssignToReg<[S16, S17, S18, S19, S20, S21, S22, S23]>>, // Promote i8/i16 arguments to i32. CCIfType<[i8, i16], CCPromoteToType>, @@ -124,7 +124,7 @@ def CC_ARM_AAPCS_Common : CallingConv<[ - CCIfType<[i1, i8, i16], CCPromoteToType>, + CCIfType<[i1, i8, i16, f16], CCPromoteToType>, // i64/f64 is passed in even pairs of GPRs // i64 is 8-aligned i32 here, so we may need to eat R1 as a pad register @@ -214,8 +214,8 @@ CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>, CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>, - CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8, - S9, S10, S11, S12, S13, S14, S15]>>, + CCIfType<[f16, f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8, + S9, S10, S11, S12, S13, S14, S15]>>, CCDelegateTo ]>; @@ -232,7 +232,7 @@ CCIfType<[v2f64], CCAssignToReg<[Q0, Q1, Q2, Q3]>>, CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>, - CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8, + CCIfType<[f16, f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, S15]>>, CCDelegateTo ]>; Index: lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- lib/Target/ARM/ARMISelLowering.cpp +++ lib/Target/ARM/ARMISelLowering.cpp @@ -523,6 +523,9 @@ !Subtarget->isThumb1Only()) { addRegisterClass(MVT::f32, &ARM::SPRRegClass); addRegisterClass(MVT::f64, &ARM::DPRRegClass); + + if(Subtarget->hasFullFP16()) + addRegisterClass(MVT::f16, &ARM::HPRRegClass); } for (MVT VT : MVT::vector_valuetypes()) { @@ -3698,7 +3701,9 @@ } else { const TargetRegisterClass *RC; - if (RegVT == MVT::f32) + if (RegVT == MVT::f16) + RC = &ARM::HPRRegClass; + else if (RegVT == MVT::f32) RC = &ARM::SPRRegClass; else if (RegVT == MVT::f64) RC = &ARM::DPRRegClass; Index: lib/Target/ARM/ARMInstrVFP.td =================================================================== --- lib/Target/ARM/ARMInstrVFP.td +++ lib/Target/ARM/ARMInstrVFP.td @@ -355,9 +355,9 @@ let TwoOperandAliasConstraint = "$Sn = $Sd" in def VADDH : AHbI<0b11100, 0b11, 0, 0, - (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), + (outs HPR:$Sd), (ins HPR:$Sn, HPR:$Sm), IIC_fpALU16, "vadd", ".f16\t$Sd, $Sn, $Sm", - []>, + [(set HPR:$Sd, (fadd HPR:$Sn, HPR:$Sm))]>, Sched<[WriteFPALU32]>; let TwoOperandAliasConstraint = "$Dn = $Dd" in @@ -380,9 +380,9 @@ let TwoOperandAliasConstraint = "$Sn = $Sd" in def VSUBH : AHbI<0b11100, 0b11, 1, 0, - (outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm), + (outs HPR:$Sd), (ins HPR:$Sn, HPR:$Sm), IIC_fpALU16, "vsub", ".f16\t$Sd, $Sn, $Sm", - []>, + [(set HPR:$Sd, (fsub HPR:$Sn, HPR:$Sm))]>, Sched<[WriteFPALU32]>; let TwoOperandAliasConstraint = "$Dn = $Dd" in Index: lib/Target/ARM/ARMRegisterInfo.td =================================================================== --- lib/Target/ARM/ARMRegisterInfo.td +++ lib/Target/ARM/ARMRegisterInfo.td @@ -296,6 +296,17 @@ }]; } +def HPR : RegisterClass<"ARM", [f16], 32, (sequence "S%u", 0, 31)> { + let AltOrders = [(add (decimate SPR, 2), SPR), + (add (decimate SPR, 4), + (decimate SPR, 2), + (decimate (rotl SPR, 1), 4), + (decimate (rotl SPR, 1), 2))]; + let AltOrderSelect = [{ + return 1 + MF.getSubtarget().useStride4VFPs(MF); + }]; +} + // Subset of SPR which can be used as a source of NEON scalars for 16-bit // operations def SPR_8 : RegisterClass<"ARM", [f32], 32, (sequence "S%u", 0, 15)>; Index: lib/Target/ARM/Disassembler/ARMDisassembler.cpp =================================================================== --- lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -156,6 +156,8 @@ uint64_t Address, const void *Decoder); static DecodeStatus DecodeGPRPairRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeHPRRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder); static DecodeStatus DecodeSPRRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); static DecodeStatus DecodeDPRRegisterClass(MCInst &Inst, unsigned RegNo, @@ -180,6 +182,8 @@ uint64_t Address, const void *Decoder); static DecodeStatus DecodeRegListOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeHPRRegListOperand(MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); static DecodeStatus DecodeSPRRegListOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder); static DecodeStatus DecodeDPRRegListOperand(MCInst &Inst, unsigned Val, @@ -988,6 +992,11 @@ return MCDisassembler::Success; } +static DecodeStatus DecodeHPRRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder) { + return DecodeSPRRegisterClass(Inst, RegNo, Address, Decoder); +} + static const uint16_t DPRDecoderTable[] = { ARM::D0, ARM::D1, ARM::D2, ARM::D3, ARM::D4, ARM::D5, ARM::D6, ARM::D7, @@ -1245,6 +1254,11 @@ return S; } +static DecodeStatus DecodeHPRRegListOperand(MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder) { + return DecodeSPRRegListOperand(Inst, Val, Address, Decoder); +} + static DecodeStatus DecodeDPRRegListOperand(MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { DecodeStatus S = MCDisassembler::Success; Index: test/CodeGen/ARM/fp16-instructions.ll =================================================================== --- /dev/null +++ test/CodeGen/ARM/fp16-instructions.ll @@ -0,0 +1,35 @@ +; RUN: llc < %s -mtriple=arm-none-eabi -float-abi=soft | FileCheck %s --check-prefix=CHECK-SOFT +; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+neon,+fullfp16 -float-abi=hard | FileCheck %s --check-prefix=CHECK-FP16 + +define half @Sub(half %a, half %b) local_unnamed_addr { +entry: +;CHECK-FP16-LABEL: Sub: +;CHECK-FP16: vsub.f16 s0, s0, s1 +;CHECK-FP16-NEXT: mov pc, lr + +;CHECK-SOFT-LABEL: Sub: +;CHECK-SOFT: bl __aeabi_h2f +;CHECK-SOFT: bl __aeabi_h2f +;CHECK-SOFT: bl __aeabi_fsub +;CHECK-SOFT: bl __aeabi_f2h + + %sub = fsub half %a, %b + ret half %sub +} + + +define half @Add(half %a, half %b) local_unnamed_addr { +entry: +;CHECK-FP16-LABEL: Add: +;CHECK-FP16: vadd.f16 s0, s0, s1 +;CHECK-FP16-NEXT: mov pc, lr + +;CHECK-SOFT-LABEL: Add: +;CHECK-SOFT: bl __aeabi_h2f +;CHECK-SOFT: bl __aeabi_h2f +;CHECK-SOFT: bl __aeabi_fadd +;CHECK-SOFT: bl __aeabi_f2h + + %add = fadd half %a, %b + ret half %add +}