Index: lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -70,9 +70,13 @@ case ISD::MERGE_VALUES:R = SoftenFloatRes_MERGE_VALUES(N, ResNo); break; case ISD::BITCAST: R = SoftenFloatRes_BITCAST(N, ResNo); break; case ISD::BUILD_PAIR: R = SoftenFloatRes_BUILD_PAIR(N); break; + case ISD::BUILD_VECTOR: + R = SoftenFloatRes_BUILD_VECTOR(N); break; case ISD::ConstantFP: R = SoftenFloatRes_ConstantFP(N, ResNo); break; case ISD::EXTRACT_VECTOR_ELT: R = SoftenFloatRes_EXTRACT_VECTOR_ELT(N, ResNo); break; + case ISD::INSERT_VECTOR_ELT: + R = SoftenFloatRes_INSERT_VECTOR_ELT(N); break; case ISD::FABS: R = SoftenFloatRes_FABS(N, ResNo); break; case ISD::FMINNUM: R = SoftenFloatRes_FMINNUM(N); break; case ISD::FMAXNUM: R = SoftenFloatRes_FMAXNUM(N); break; @@ -144,6 +148,18 @@ BitConvertToInteger(N->getOperand(1))); } +SDValue DAGTypeLegalizer::SoftenFloatRes_BUILD_VECTOR(SDNode *N) { + SmallVector ConvertedValues; + llvm::transform( + N->op_values(), std::back_inserter(ConvertedValues), + [this](const SDValue &Val) { return BitConvertToInteger(Val); }); + + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), + TLI.getTypeToTransformTo(*DAG.getContext(), + N->getValueType(0)), + ConvertedValues); +} + SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(SDNode *N, unsigned ResNo) { // When LegalInHWReg, we can load better from the constant pool. if (isLegalInHWReg(N->getValueType(ResNo))) @@ -181,6 +197,13 @@ NewOp, N->getOperand(1)); } +SDValue DAGTypeLegalizer::SoftenFloatRes_INSERT_VECTOR_ELT(SDNode *N) { + SDValue NewVec = BitConvertVectorToIntegerVector(N->getOperand(0)); + SDValue NewElem = BitConvertToInteger(N->getOperand(1)); + return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), NewVec.getValueType(), + NewVec, NewElem, N->getOperand(2)); +} + SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N, unsigned ResNo) { // When LegalInHWReg, FABS can be implemented as native bitwise operations. if (isLegalInHWReg(N->getValueType(ResNo))) Index: lib/CodeGen/SelectionDAG/LegalizeTypes.h =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -470,8 +470,10 @@ SDValue SoftenFloatRes_MERGE_VALUES(SDNode *N, unsigned ResNo); SDValue SoftenFloatRes_BITCAST(SDNode *N, unsigned ResNo); SDValue SoftenFloatRes_BUILD_PAIR(SDNode *N); + SDValue SoftenFloatRes_BUILD_VECTOR(SDNode *N); SDValue SoftenFloatRes_ConstantFP(SDNode *N, unsigned ResNo); SDValue SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N, unsigned ResNo); + SDValue SoftenFloatRes_INSERT_VECTOR_ELT(SDNode *N); SDValue SoftenFloatRes_FABS(SDNode *N, unsigned ResNo); SDValue SoftenFloatRes_FMINNUM(SDNode *N); SDValue SoftenFloatRes_FMAXNUM(SDNode *N); Index: lib/CodeGen/TargetLoweringBase.cpp =================================================================== --- lib/CodeGen/TargetLoweringBase.cpp +++ lib/CodeGen/TargetLoweringBase.cpp @@ -1165,6 +1165,22 @@ bool IsLegalWiderType = false; LegalizeTypeAction PreferredAction = getPreferredVectorAction(VT); switch (PreferredAction) { + case TypeSoftenFloat: { + MVT SoftEltVT = MVT::getIntegerVT(EltVT.getSizeInBits()); + MVT SoftVT = MVT::getVectorVT(SoftEltVT, NElts); + if (isTypeLegal(SoftVT)) { + unsigned ToInd = (unsigned)SoftVT.SimpleTy; + assert(ToInd < i && "FP types precede integer types in MVT?"); + TransformToType[i] = SoftVT; + RegisterTypeForVT[i] = RegisterTypeForVT[ToInd]; + NumRegistersForVT[i] = NumRegistersForVT[ToInd]; + ValueTypeActions.setTypeAction(VT, TypeSoftenFloat); + break; + } + + LLVM_FALLTHROUGH; + } + case TypePromoteInteger: // Try to promote the elements of integer vectors. If no legal // promotion was found, fall through to the widen-vector method. Index: lib/Target/ARM/ARMISelLowering.h =================================================================== --- lib/Target/ARM/ARMISelLowering.h +++ lib/Target/ARM/ARMISelLowering.h @@ -497,6 +497,9 @@ bool functionArgumentNeedsConsecutiveRegisters( Type *Ty, CallingConv::ID CallConv, bool isVarArg) const override; + TargetLoweringBase::LegalizeTypeAction + getPreferredVectorAction(EVT VT) const override; + /// If a physical register, this returns the register that receives the /// exception address on entry to an EH pad. unsigned Index: lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- lib/Target/ARM/ARMISelLowering.cpp +++ lib/Target/ARM/ARMISelLowering.cpp @@ -14846,6 +14846,13 @@ return IsHA || IsIntArray; } +TargetLoweringBase::LegalizeTypeAction +ARMTargetLowering::getPreferredVectorAction(EVT VT) const { + if (VT.isFloatingPoint() && VT.getScalarSizeInBits() == 16) + return TargetLoweringBase::LegalizeTypeAction::TypeSoftenFloat; + return TargetLoweringBase::getPreferredVectorAction(VT); +} + unsigned ARMTargetLowering::getExceptionPointerRegister( const Constant *PersonalityFn) const { // Platforms which do not use SjLj EH may return values in these registers Index: test/CodeGen/ARM/fp16-promote.ll =================================================================== --- test/CodeGen/ARM/fp16-promote.ll +++ test/CodeGen/ARM/fp16-promote.ll @@ -820,15 +820,15 @@ ; CHECK-ALL-LABEL: test_insertelement: ; CHECK-ALL: sub sp, sp, #8 -; CHECK-VFP: and -; CHECK-VFP: mov -; CHECK-VFP: ldrd -; CHECK-VFP: orr -; CHECK-VFP: ldrh -; CHECK-VFP: stm -; CHECK-VFP: strh -; CHECK-VFP: ldm -; CHECK-VFP: stm +; CHECK-VFP: and +; CHECK-VFP: mov +; CHECK-VFP: vldr +; CHECK-VFP: orr +; CHECK-VFP: ldrh +; CHECK-VFP: vstr +; CHECK-VFP: strh +; CHECK-VFP: vldr +; CHECK-VFP: vstr ; CHECK-NOVFP: ldrh ; CHECK-NOVFP: ldrh @@ -860,15 +860,15 @@ } ; CHECK-ALL-LABEL: test_extractelement: -; CHECK-VFP: push {{{.*}}, lr} ; CHECK-VFP: sub sp, sp, #8 -; CHECK-VFP: ldrd +; CHECK-VFP: vldr +; CHECK-VFP: and ; CHECK-VFP: mov ; CHECK-VFP: orr +; CHECK-VFP: vstr ; CHECK-VFP: ldrh ; CHECK-VFP: strh ; CHECK-VFP: add sp, sp, #8 -; CHECK-VFP: pop {{{.*}}, pc} ; CHECK-NOVFP: ldrh ; CHECK-NOVFP: strh ; CHECK-NOVFP: ldrh Index: test/CodeGen/ARM/fp16-soften.ll =================================================================== --- /dev/null +++ test/CodeGen/ARM/fp16-soften.ll @@ -0,0 +1,21 @@ +; RUN: llc < %s | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "armv7-none--eabi" + +@v = local_unnamed_addr global <4 x half> zeroinitializer, align 8 + +declare void @callee(<4 x half>) #0 + +; CHECK-LABEL: test_soften: +; CHECK: vldr [[DREG:d[0-9]+]], {{\[r[0-9]+]}} +; CHECK-NEXT: vmov r0, r1, [[DREG]] +; CHECK-NEXT: b callee +define void @test_soften() #0 { +entry: + %0 = load <4 x half>, <4 x half>* @v, align 8 + tail call void (<4 x half>) @callee(<4 x half> %0) + ret void +} + +attributes #0 = { nounwind } Index: test/CodeGen/ARM/fp16-v3.ll =================================================================== --- test/CodeGen/ARM/fp16-v3.ll +++ test/CodeGen/ARM/fp16-v3.ll @@ -11,10 +11,8 @@ ; CHECK: vadd.f32 [[SREG5:s[0-9]+]], [[SREG4]], [[SREG1]] ; CHECK-NEXT: vcvtb.f16.f32 [[SREG6:s[0-9]+]], [[SREG5]] ; CHECK-NEXT: vmov [[RREG1:r[0-9]+]], [[SREG6]] -; CHECK-DAG: uxth [[RREG2:r[0-9]+]], [[RREG1]] -; CHECK-DAG: pkhbt [[RREG3:r[0-9]+]], [[RREG1]], [[RREG1]], lsl #16 ; CHECK-DAG: strh [[RREG1]], [r0, #4] -; CHECK-DAG: vmov [[DREG:d[0-9]+]], [[RREG3]], [[RREG2]] +; CHECK-DAG: vdup.16 [[DREG:d[0-9]+]], [[RREG1]] ; CHECK-DAG: vst1.32 {[[DREG]][0]}, [r0:32] ; CHECK-NEXT: bx lr define void @test_vec3(<3 x half>* %arr, i32 %i) #0 { @@ -30,9 +28,11 @@ ; CHECK-LABEL: test_bitcast: ; CHECK: vcvtb.f16.f32 ; CHECK: vcvtb.f16.f32 +; CHECK: vmov.16 ; CHECK: vcvtb.f16.f32 -; CHECK: pkhbt -; CHECK: uxth +; CHECK: vmov.16 +; CHECK: vst1.32 +; CHECK: strh define void @test_bitcast(<3 x half> %inp, <3 x i16>* %arr) #0 { %bc = bitcast <3 x half> %inp to <3 x i16> store <3 x i16> %bc, <3 x i16>* %arr, align 8 Index: test/Transforms/LoopVectorize/ARM/interleaved_cost.ll =================================================================== --- test/Transforms/LoopVectorize/ARM/interleaved_cost.ll +++ test/Transforms/LoopVectorize/ARM/interleaved_cost.ll @@ -121,12 +121,12 @@ br label %for.body ; VF_4-LABEL: Checking a loop in "half_factor_2" -; VF_4: Found an estimated cost of 40 for VF 4 For instruction: %tmp2 = load half, half* %tmp0, align 2 +; VF_4: Found an estimated cost of 33 for VF 4 For instruction: %tmp2 = load half, half* %tmp0, align 2 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load half, half* %tmp1, align 2 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store half 0xH0000, half* %tmp0, align 2 ; VF_4-NEXT: Found an estimated cost of 32 for VF 4 For instruction: store half 0xH0000, half* %tmp1, align 2 ; VF_8-LABEL: Checking a loop in "half_factor_2" -; VF_8: Found an estimated cost of 80 for VF 8 For instruction: %tmp2 = load half, half* %tmp0, align 2 +; VF_8: Found an estimated cost of 66 for VF 8 For instruction: %tmp2 = load half, half* %tmp0, align 2 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load half, half* %tmp1, align 2 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store half 0xH0000, half* %tmp0, align 2 ; VF_8-NEXT: Found an estimated cost of 64 for VF 8 For instruction: store half 0xH0000, half* %tmp1, align 2