Index: lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -1767,6 +1767,10 @@ case ISD::SELECT_CC: R = PromoteFloatOp_SELECT_CC(N, OpNo); break; case ISD::SETCC: R = PromoteFloatOp_SETCC(N, OpNo); break; case ISD::STORE: R = PromoteFloatOp_STORE(N, OpNo); break; + case ISD::BUILD_VECTOR: R = PromoteFloatOp_BUILD_VECTOR(N, OpNo); break; + case ISD::INSERT_VECTOR_ELT: + R = PromoteFloatOp_INSERT_VECTOR_ELT(N, OpNo); + break; } if (R.getNode()) @@ -1861,6 +1865,29 @@ ST->getMemOperand()); } +SDValue DAGTypeLegalizer::PromoteFloatOp_BUILD_VECTOR(SDNode *N, + unsigned OpNo) { + SmallVector ConvertedValues; + llvm::transform( + N->op_values(), std::back_inserter(ConvertedValues), + [this](const SDValue &Val) { return BitConvertToInteger(Val); }); + + SDValue IntRes = DAG.getNode( + ISD::BUILD_VECTOR, SDLoc(N), + N->getValueType(0).changeVectorElementTypeToInteger(), ConvertedValues); + return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), IntRes); +} + +SDValue DAGTypeLegalizer::PromoteFloatOp_INSERT_VECTOR_ELT(SDNode *N, + unsigned OpNo) { + SDValue IntVec = BitConvertVectorToIntegerVector(N->getOperand(0)); + SDValue IntElem = BitConvertToInteger(N->getOperand(1)); + SDValue IntRes = + DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), IntVec.getValueType(), + IntVec, IntElem, N->getOperand(2)); + return DAG.getNode(ISD::BITCAST, SDLoc(IntVec), N->getValueType(0), IntRes); +} + //===----------------------------------------------------------------------===// // Float Result Promotion //===----------------------------------------------------------------------===// Index: lib/CodeGen/SelectionDAG/LegalizeTypes.h =================================================================== --- lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -628,6 +628,8 @@ SDValue PromoteFloatOp_STORE(SDNode *N, unsigned OpNo); SDValue PromoteFloatOp_SELECT_CC(SDNode *N, unsigned OpNo); SDValue PromoteFloatOp_SETCC(SDNode *N, unsigned OpNo); + SDValue PromoteFloatOp_BUILD_VECTOR(SDNode *N, unsigned OpNo); + SDValue PromoteFloatOp_INSERT_VECTOR_ELT(SDNode *N, unsigned OpNo); //===--------------------------------------------------------------------===// // Scalarization Support: LegalizeVectorTypes.cpp Index: lib/Target/ARM/ARMISelLowering.h =================================================================== --- lib/Target/ARM/ARMISelLowering.h +++ lib/Target/ARM/ARMISelLowering.h @@ -609,6 +609,11 @@ void addTypeForNEON(MVT VT, MVT PromotedLdStVT, MVT PromotedBitwiseVT); void addDRTypeForNEON(MVT VT); void addQRTypeForNEON(MVT VT); + /// Expand all operations (except loads, stores and basic arithmetic) + /// for a given FP type + void setFPFunctionsExpand(MVT VT); + /// Expand all operations (except loads and stores) for a given FP type + void setFPOperationsExpand(MVT VT); std::pair getARMXALUOOp(SDValue Op, SelectionDAG &DAG, SDValue &ARMcc) const; using RegsToPassVector = SmallVector, 8>; Index: lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- lib/Target/ARM/ARMISelLowering.cpp +++ lib/Target/ARM/ARMISelLowering.cpp @@ -220,6 +220,24 @@ addTypeForNEON(VT, MVT::v2f64, MVT::v4i32); } +void ARMTargetLowering::setFPFunctionsExpand(MVT VT) { + for (ISD::NodeType Op : { ISD::FSQRT, ISD::FSIN, ISD::FCOS, + ISD::FPOW, ISD::FLOG, ISD::FLOG2, + ISD::FLOG10, ISD::FEXP, ISD::FEXP2, + ISD::FCEIL, ISD::FTRUNC, ISD::FRINT, + ISD::FNEARBYINT, ISD::FFLOOR }) + setOperationAction(Op, VT, Expand); +} + +void ARMTargetLowering::setFPOperationsExpand(MVT VT) { + for (ISD::NodeType Op : { ISD::FADD, ISD::FSUB, ISD::FMUL, + ISD::FMA, ISD::FDIV, ISD::FREM, + ISD::FCOPYSIGN, ISD::FGETSIGN, ISD::SETCC, + ISD::FNEG, ISD::FABS }) + setOperationAction(Op, VT, Expand); + setFPFunctionsExpand(VT); +} + ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, const ARMSubtarget &STI) : TargetLowering(TM), Subtarget(&STI) { @@ -561,79 +579,35 @@ addQRTypeForNEON(MVT::v4i32); addQRTypeForNEON(MVT::v2i64); - if (Subtarget->hasFullFP16()) { - addQRTypeForNEON(MVT::v8f16); - addDRTypeForNEON(MVT::v4f16); + // Even if the target does not support FP16 operations we want to keep + // <4 x half> and <8 x half> legal, because they can still be used as + // storage types and need to be handled correctly when passed as function + // parameters (the calling convention requires to treat them as + // containerized vectors) + addQRTypeForNEON(MVT::v8f16); + addDRTypeForNEON(MVT::v4f16); + if (!Subtarget->hasFullFP16()) { + setFPOperationsExpand(MVT::v8f16); + setFPOperationsExpand(MVT::v4f16); } // v2f64 is legal so that QR subregs can be extracted as f64 elements, but // neither Neon nor VFP support any arithmetic operations on it. // The same with v4f32. But keep in mind that vadd, vsub, vmul are natively // supported for v4f32. - setOperationAction(ISD::FADD, MVT::v2f64, Expand); - setOperationAction(ISD::FSUB, MVT::v2f64, Expand); - setOperationAction(ISD::FMUL, MVT::v2f64, Expand); - // FIXME: Code duplication: FDIV and FREM are expanded always, see - // ARMTargetLowering::addTypeForNEON method for details. - setOperationAction(ISD::FDIV, MVT::v2f64, Expand); - setOperationAction(ISD::FREM, MVT::v2f64, Expand); - // FIXME: Create unittest. + // FIXME: Create unittest for FCOPYSIGN. // In another words, find a way when "copysign" appears in DAG with vector // operands. - setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Expand); // FIXME: Code duplication: SETCC has custom operation action, see // ARMTargetLowering::addTypeForNEON method for details. - setOperationAction(ISD::SETCC, MVT::v2f64, Expand); // FIXME: Create unittest for FNEG and for FABS. - setOperationAction(ISD::FNEG, MVT::v2f64, Expand); - setOperationAction(ISD::FABS, MVT::v2f64, Expand); - setOperationAction(ISD::FSQRT, MVT::v2f64, Expand); - setOperationAction(ISD::FSIN, MVT::v2f64, Expand); - setOperationAction(ISD::FCOS, MVT::v2f64, Expand); - setOperationAction(ISD::FPOW, MVT::v2f64, Expand); - setOperationAction(ISD::FLOG, MVT::v2f64, Expand); - setOperationAction(ISD::FLOG2, MVT::v2f64, Expand); - setOperationAction(ISD::FLOG10, MVT::v2f64, Expand); - setOperationAction(ISD::FEXP, MVT::v2f64, Expand); - setOperationAction(ISD::FEXP2, MVT::v2f64, Expand); // FIXME: Create unittest for FCEIL, FTRUNC, FRINT, FNEARBYINT, FFLOOR. - setOperationAction(ISD::FCEIL, MVT::v2f64, Expand); - setOperationAction(ISD::FTRUNC, MVT::v2f64, Expand); - setOperationAction(ISD::FRINT, MVT::v2f64, Expand); - setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand); - setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand); - setOperationAction(ISD::FMA, MVT::v2f64, Expand); - - setOperationAction(ISD::FSQRT, MVT::v4f32, Expand); - setOperationAction(ISD::FSIN, MVT::v4f32, Expand); - setOperationAction(ISD::FCOS, MVT::v4f32, Expand); - setOperationAction(ISD::FPOW, MVT::v4f32, Expand); - setOperationAction(ISD::FLOG, MVT::v4f32, Expand); - setOperationAction(ISD::FLOG2, MVT::v4f32, Expand); - setOperationAction(ISD::FLOG10, MVT::v4f32, Expand); - setOperationAction(ISD::FEXP, MVT::v4f32, Expand); - setOperationAction(ISD::FEXP2, MVT::v4f32, Expand); - setOperationAction(ISD::FCEIL, MVT::v4f32, Expand); - setOperationAction(ISD::FTRUNC, MVT::v4f32, Expand); - setOperationAction(ISD::FRINT, MVT::v4f32, Expand); - setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Expand); - setOperationAction(ISD::FFLOOR, MVT::v4f32, Expand); + setFPOperationsExpand(MVT::v2f64); + + setFPFunctionsExpand(MVT::v4f32); // Mark v2f32 intrinsics. - setOperationAction(ISD::FSQRT, MVT::v2f32, Expand); - setOperationAction(ISD::FSIN, MVT::v2f32, Expand); - setOperationAction(ISD::FCOS, MVT::v2f32, Expand); - setOperationAction(ISD::FPOW, MVT::v2f32, Expand); - setOperationAction(ISD::FLOG, MVT::v2f32, Expand); - setOperationAction(ISD::FLOG2, MVT::v2f32, Expand); - setOperationAction(ISD::FLOG10, MVT::v2f32, Expand); - setOperationAction(ISD::FEXP, MVT::v2f32, Expand); - setOperationAction(ISD::FEXP2, MVT::v2f32, Expand); - setOperationAction(ISD::FCEIL, MVT::v2f32, Expand); - setOperationAction(ISD::FTRUNC, MVT::v2f32, Expand); - setOperationAction(ISD::FRINT, MVT::v2f32, Expand); - setOperationAction(ISD::FNEARBYINT, MVT::v2f32, Expand); - setOperationAction(ISD::FFLOOR, MVT::v2f32, Expand); + setFPFunctionsExpand(MVT::v2f32); // Neon does not support some operations on v1i64 and v2i64 types. setOperationAction(ISD::MUL, MVT::v1i64, Expand); @@ -732,30 +706,7 @@ // operations, f64 is legal for the few double-precision instructions which // are present However, no double-precision operations other than moves, // loads and stores are provided by the hardware. - setOperationAction(ISD::FADD, MVT::f64, Expand); - setOperationAction(ISD::FSUB, MVT::f64, Expand); - setOperationAction(ISD::FMUL, MVT::f64, Expand); - setOperationAction(ISD::FMA, MVT::f64, Expand); - setOperationAction(ISD::FDIV, MVT::f64, Expand); - setOperationAction(ISD::FREM, MVT::f64, Expand); - setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); - setOperationAction(ISD::FGETSIGN, MVT::f64, Expand); - setOperationAction(ISD::FNEG, MVT::f64, Expand); - setOperationAction(ISD::FABS, MVT::f64, Expand); - setOperationAction(ISD::FSQRT, MVT::f64, Expand); - setOperationAction(ISD::FSIN, MVT::f64, Expand); - setOperationAction(ISD::FCOS, MVT::f64, Expand); - setOperationAction(ISD::FPOW, MVT::f64, Expand); - setOperationAction(ISD::FLOG, MVT::f64, Expand); - setOperationAction(ISD::FLOG2, MVT::f64, Expand); - setOperationAction(ISD::FLOG10, MVT::f64, Expand); - setOperationAction(ISD::FEXP, MVT::f64, Expand); - setOperationAction(ISD::FEXP2, MVT::f64, Expand); - setOperationAction(ISD::FCEIL, MVT::f64, Expand); - setOperationAction(ISD::FTRUNC, MVT::f64, Expand); - setOperationAction(ISD::FRINT, MVT::f64, Expand); - setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand); - setOperationAction(ISD::FFLOOR, MVT::f64, Expand); + setFPOperationsExpand(MVT::f64); setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); Index: test/CodeGen/ARM/fp16-promote.ll =================================================================== --- test/CodeGen/ARM/fp16-promote.ll +++ test/CodeGen/ARM/fp16-promote.ll @@ -820,15 +820,15 @@ ; CHECK-ALL-LABEL: test_insertelement: ; CHECK-ALL: sub sp, sp, #8 -; CHECK-VFP: and -; CHECK-VFP: mov -; CHECK-VFP: ldrd -; CHECK-VFP: orr -; CHECK-VFP: ldrh -; CHECK-VFP: stm -; CHECK-VFP: strh -; CHECK-VFP: ldm -; CHECK-VFP: stm +; CHECK-VFP: and +; CHECK-VFP: mov +; CHECK-VFP: vldr +; CHECK-VFP: orr +; CHECK-VFP: ldrh +; CHECK-VFP: vstr +; CHECK-VFP: strh +; CHECK-VFP: vldr +; CHECK-VFP: vstr ; CHECK-NOVFP: ldrh ; CHECK-NOVFP: ldrh @@ -860,15 +860,15 @@ } ; CHECK-ALL-LABEL: test_extractelement: -; CHECK-VFP: push {{{.*}}, lr} ; CHECK-VFP: sub sp, sp, #8 -; CHECK-VFP: ldrd +; CHECK-VFP: vldr +; CHECK-VFP: and ; CHECK-VFP: mov ; CHECK-VFP: orr +; CHECK-VFP: vstr ; CHECK-VFP: ldrh ; CHECK-VFP: strh ; CHECK-VFP: add sp, sp, #8 -; CHECK-VFP: pop {{{.*}}, pc} ; CHECK-NOVFP: ldrh ; CHECK-NOVFP: strh ; CHECK-NOVFP: ldrh Index: test/CodeGen/ARM/fp16-v3.ll =================================================================== --- test/CodeGen/ARM/fp16-v3.ll +++ test/CodeGen/ARM/fp16-v3.ll @@ -11,10 +11,8 @@ ; CHECK: vadd.f32 [[SREG5:s[0-9]+]], [[SREG4]], [[SREG1]] ; CHECK-NEXT: vcvtb.f16.f32 [[SREG6:s[0-9]+]], [[SREG5]] ; CHECK-NEXT: vmov [[RREG1:r[0-9]+]], [[SREG6]] -; CHECK-DAG: uxth [[RREG2:r[0-9]+]], [[RREG1]] -; CHECK-DAG: pkhbt [[RREG3:r[0-9]+]], [[RREG1]], [[RREG1]], lsl #16 ; CHECK-DAG: strh [[RREG1]], [r0, #4] -; CHECK-DAG: vmov [[DREG:d[0-9]+]], [[RREG3]], [[RREG2]] +; CHECK-DAG: vdup.16 [[DREG:d[0-9]+]], [[RREG1]] ; CHECK-DAG: vst1.32 {[[DREG]][0]}, [r0:32] ; CHECK-NEXT: bx lr define void @test_vec3(<3 x half>* %arr, i32 %i) #0 { @@ -28,11 +26,9 @@ } ; CHECK-LABEL: test_bitcast: -; CHECK: vcvtb.f16.f32 -; CHECK: vcvtb.f16.f32 -; CHECK: vcvtb.f16.f32 -; CHECK: pkhbt -; CHECK: uxth +; CHECK-DAG: vst1.16 +; CHECK-DAG: vst1.32 +; CHECK: bx lr define void @test_bitcast(<3 x half> %inp, <3 x i16>* %arr) #0 { %bc = bitcast <3 x half> %inp to <3 x i16> store <3 x i16> %bc, <3 x i16>* %arr, align 8 Index: test/CodeGen/ARM/vfp16-calling-conv.ll =================================================================== --- /dev/null +++ test/CodeGen/ARM/vfp16-calling-conv.ll @@ -0,0 +1,33 @@ +; RUN: llc < %s | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "armv7-none--eabi" + +@v = local_unnamed_addr global <4 x half> zeroinitializer, align 8 + +declare void @callee(<4 x half>) #0 + +; CHECK-LABEL: test_soften: +; CHECK: vldr [[DREG:d[0-9]+]], {{\[r[0-9]+]}} +; CHECK-NEXT: vmov r0, r1, [[DREG]] +; CHECK-NEXT: b callee +define void @test_soften() #0 { +entry: + %0 = load <4 x half>, <4 x half>* @v, align 8 + tail call void (<4 x half>) @callee(<4 x half> %0) + ret void +} + +; CHECK-LABEL: test_illegal_op: +; CHECK: vadd.f32 +; CHECK: vadd.f32 +; CHECK: vadd.f32 +; CHECK: vadd.f32 +; CHECK: b callee +define void @test_illegal_op(<4 x half> %a, <4 x half> %b) #0 { + %c = fadd <4 x half> %a, %b + tail call void (<4 x half>) @callee(<4 x half> %c) + ret void +} + +attributes #0 = { nounwind } Index: test/Transforms/LoopVectorize/ARM/interleaved_cost.ll =================================================================== --- test/Transforms/LoopVectorize/ARM/interleaved_cost.ll +++ test/Transforms/LoopVectorize/ARM/interleaved_cost.ll @@ -121,12 +121,12 @@ br label %for.body ; VF_4-LABEL: Checking a loop in "half_factor_2" -; VF_4: Found an estimated cost of 40 for VF 4 For instruction: %tmp2 = load half, half* %tmp0, align 2 +; VF_4: Found an estimated cost of 33 for VF 4 For instruction: %tmp2 = load half, half* %tmp0, align 2 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load half, half* %tmp1, align 2 ; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store half 0xH0000, half* %tmp0, align 2 ; VF_4-NEXT: Found an estimated cost of 32 for VF 4 For instruction: store half 0xH0000, half* %tmp1, align 2 ; VF_8-LABEL: Checking a loop in "half_factor_2" -; VF_8: Found an estimated cost of 80 for VF 8 For instruction: %tmp2 = load half, half* %tmp0, align 2 +; VF_8: Found an estimated cost of 66 for VF 8 For instruction: %tmp2 = load half, half* %tmp0, align 2 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load half, half* %tmp1, align 2 ; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store half 0xH0000, half* %tmp0, align 2 ; VF_8-NEXT: Found an estimated cost of 64 for VF 8 For instruction: store half 0xH0000, half* %tmp1, align 2