Index: llvm/trunk/lib/Target/ARM/ARMISelLowering.h =================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelLowering.h +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.h @@ -794,6 +794,8 @@ bool shouldConsiderGEPOffsetSplit() const override { return true; } + bool isUnsupportedFloatingType(EVT VT) const; + SDValue getCMOV(const SDLoc &dl, EVT VT, SDValue FalseVal, SDValue TrueVal, SDValue ARMcc, SDValue CCR, SDValue Cmp, SelectionDAG &DAG) const; Index: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp @@ -224,6 +224,13 @@ void ARMTargetLowering::setAllExpand(MVT VT) { for (unsigned Opc = 0; Opc < ISD::BUILTIN_OP_END; ++Opc) setOperationAction(Opc, VT, Expand); + + // We support these really simple operations even on types where all + // the actual arithmetic has to be broken down into simpler + // operations or turned into library calls. + setOperationAction(ISD::BITCAST, VT, Legal); + setOperationAction(ISD::LOAD, VT, Legal); + setOperationAction(ISD::STORE, VT, Legal); } void ARMTargetLowering::addAllExtLoads(const MVT From, const MVT To, @@ -262,9 +269,6 @@ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); setOperationAction(ISD::BUILD_VECTOR, VT, Custom); setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Legal); - setOperationAction(ISD::BITCAST, VT, Legal); - setOperationAction(ISD::LOAD, VT, Legal); - setOperationAction(ISD::STORE, VT, Legal); if (HasMVEFP) { // No native support for these. @@ -289,9 +293,6 @@ for (auto VT : LongTypes) { addRegisterClass(VT, &ARM::QPRRegClass); setAllExpand(VT); - setOperationAction(ISD::BITCAST, VT, Legal); - setOperationAction(ISD::LOAD, VT, Legal); - setOperationAction(ISD::STORE, VT, Legal); } // It is legal to extload from v4i8 to v4i16 or v4i32. @@ -594,10 +595,14 @@ else addRegisterClass(MVT::i32, &ARM::GPRRegClass); - if (!Subtarget->useSoftFloat() && Subtarget->hasFPRegs() && - !Subtarget->isThumb1Only()) { + if (!Subtarget->useSoftFloat() && !Subtarget->isThumb1Only() && + Subtarget->hasFPRegs()) { addRegisterClass(MVT::f32, &ARM::SPRRegClass); addRegisterClass(MVT::f64, &ARM::DPRRegClass); + if (!Subtarget->hasVFP2Base()) + setAllExpand(MVT::f32); + if (!Subtarget->hasFP64()) + setAllExpand(MVT::f64); } if (Subtarget->hasFullFP16()) { @@ -4544,6 +4549,16 @@ return false; } +bool ARMTargetLowering::isUnsupportedFloatingType(EVT VT) const { + if (VT == MVT::f32) + return !Subtarget->hasVFP2Base(); + if (VT == MVT::f64) + return !Subtarget->hasFP64(); + if (VT == MVT::f16) + return !Subtarget->hasFullFP16(); + return false; +} + SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); SDLoc dl(Op); @@ -4587,9 +4602,9 @@ SDValue TrueVal = Op.getOperand(2); SDValue FalseVal = Op.getOperand(3); - if (!Subtarget->hasFP64() && LHS.getValueType() == MVT::f64) { - DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC, - dl); + if (isUnsupportedFloatingType(LHS.getValueType())) { + DAG.getTargetLoweringInfo().softenSetCCOperands( + DAG, LHS.getValueType(), LHS, RHS, CC, dl); // If softenSetCCOperands only returned one value, we should compare it to // zero. @@ -4828,9 +4843,9 @@ SDValue Dest = Op.getOperand(4); SDLoc dl(Op); - if (!Subtarget->hasFP64() && LHS.getValueType() == MVT::f64) { - DAG.getTargetLoweringInfo().softenSetCCOperands(DAG, MVT::f64, LHS, RHS, CC, - dl); + if (isUnsupportedFloatingType(LHS.getValueType())) { + DAG.getTargetLoweringInfo().softenSetCCOperands( + DAG, LHS.getValueType(), LHS, RHS, CC, dl); // If softenSetCCOperands only returned one value, we should compare it to // zero. @@ -4975,7 +4990,7 @@ EVT VT = Op.getValueType(); if (VT.isVector()) return LowerVectorFP_TO_INT(Op, DAG); - if (!Subtarget->hasFP64() && Op.getOperand(0).getValueType() == MVT::f64) { + if (isUnsupportedFloatingType(Op.getOperand(0).getValueType())) { RTLIB::Libcall LC; if (Op.getOpcode() == ISD::FP_TO_SINT) LC = RTLIB::getFPTOSINT(Op.getOperand(0).getValueType(), @@ -5039,7 +5054,7 @@ EVT VT = Op.getValueType(); if (VT.isVector()) return LowerVectorINT_TO_FP(Op, DAG); - if (!Subtarget->hasFP64() && Op.getValueType() == MVT::f64) { + if (isUnsupportedFloatingType(VT)) { RTLIB::Libcall LC; if (Op.getOpcode() == ISD::SINT_TO_FP) LC = RTLIB::getSINTTOFP(Op.getOperand(0).getValueType(), Index: llvm/trunk/lib/Target/ARM/ARMInstrVFP.td =================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrVFP.td +++ llvm/trunk/lib/Target/ARM/ARMInstrVFP.td @@ -2269,13 +2269,13 @@ IIC_fpUNA64, [(set (f64 DPR:$Dd), (ARMcmov DPR:$Dn, DPR:$Dm, cmovpred:$p))]>, - RegConstraint<"$Dn = $Dd">, Requires<[HasVFP2,HasDPVFP]>; + RegConstraint<"$Dn = $Dd">, Requires<[HasFPRegs64]>; def VMOVScc : PseudoInst<(outs SPR:$Sd), (ins SPR:$Sn, SPR:$Sm, cmovpred:$p), IIC_fpUNA32, [(set (f32 SPR:$Sd), (ARMcmov SPR:$Sn, SPR:$Sm, cmovpred:$p))]>, - RegConstraint<"$Sn = $Sd">, Requires<[HasVFP2]>; + RegConstraint<"$Sn = $Sd">, Requires<[HasFPRegs]>; } // hasSideEffects //===----------------------------------------------------------------------===// Index: llvm/trunk/test/CodeGen/ARM/fp16-instructions.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/fp16-instructions.ll +++ llvm/trunk/test/CodeGen/ARM/fp16-instructions.ll @@ -1,6 +1,8 @@ ; SOFT: ; RUN: llc < %s -mtriple=arm-none-eabi -float-abi=soft | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT ; RUN: llc < %s -mtriple=thumb-none-eabi -float-abi=soft | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT +; RUN: llc < %s -mtriple=thumbv8.1m.main-none-eabi -mattr=+mve | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT +; RUN: llc < %s -mtriple=thumbv8.1m.main-none-eabi -float-abi=soft -mattr=+mve | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT ; SOFTFP: ; RUN: llc < %s -mtriple=arm-none-eabi -mattr=+vfp3 | FileCheck %s --check-prefixes=CHECK,CHECK-SOFTFP-VFP3 @@ -206,8 +208,8 @@ ; CHECK-LABEL: VCMPBRCC: -; CHECK-SOFT: bl __aeabi_fcmpgt -; CHECK-SOFT: cmp r0, #0 +; CHECK-SOFT: bl __aeabi_fcmp{{gt|le}} +; CHECK-SOFT: cmp r0, #{{0|1}} ; CHECK-SOFTFP-FP16: vcvtb.f32.f16 [[S2:s[0-9]]], [[S2]] ; CHECK-SOFTFP-FP16: vcmpe.f32 [[S2]], s0 Index: llvm/trunk/test/CodeGen/Thumb2/float-ops.ll =================================================================== --- llvm/trunk/test/CodeGen/Thumb2/float-ops.ll +++ llvm/trunk/test/CodeGen/Thumb2/float-ops.ll @@ -1,12 +1,13 @@ -; RUN: llc < %s -mtriple=thumbv7-none-eabi -mcpu=cortex-m3 | FileCheck %s -check-prefix=CHECK -check-prefix=NONE +; RUN: llc < %s -mtriple=thumbv7-none-eabi -mcpu=cortex-m3 | FileCheck %s -check-prefix=CHECK -check-prefix=NONE -check-prefix=NOREGS ; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-m4 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=SP -check-prefix=VFP4-ALL ; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-m7 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=DP -check-prefix=FP-ARMv8 ; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-a8 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=DP -check-prefix=VFP4-ALL -check-prefix=VFP4-DP +; RUN: llc < %s -mtriple=thumbv8.1m.main-none-eabihf -mattr=+mve | FileCheck %s -check-prefix=CHECK -check-prefix=NONE -check-prefix=ONLYREGS define float @add_f(float %a, float %b) { entry: ; CHECK-LABEL: add_f: -; NONE: bl __aeabi_fadd +; NONE: {{b|bl}} __aeabi_fadd ; HARD: vadd.f32 s0, s0, s1 %0 = fadd float %a, %b ret float %0 @@ -15,8 +16,8 @@ define double @add_d(double %a, double %b) { entry: ; CHECK-LABEL: add_d: -; NONE: bl __aeabi_dadd -; SP: bl __aeabi_dadd +; NONE: {{b|bl}} __aeabi_dadd +; SP: {{b|bl}} __aeabi_dadd ; DP: vadd.f64 d0, d0, d1 %0 = fadd double %a, %b ret double %0 @@ -25,7 +26,7 @@ define float @sub_f(float %a, float %b) { entry: ; CHECK-LABEL: sub_f: -; NONE: bl __aeabi_fsub +; NONE: {{b|bl}} __aeabi_fsub ; HARD: vsub.f32 s %0 = fsub float %a, %b ret float %0 @@ -34,8 +35,8 @@ define double @sub_d(double %a, double %b) { entry: ; CHECK-LABEL: sub_d: -; NONE: bl __aeabi_dsub -; SP: bl __aeabi_dsub +; NONE: {{b|bl}} __aeabi_dsub +; SP: {{b|bl}} __aeabi_dsub ; DP: vsub.f64 d0, d0, d1 %0 = fsub double %a, %b ret double %0 @@ -44,7 +45,7 @@ define float @mul_f(float %a, float %b) { entry: ; CHECK-LABEL: mul_f: -; NONE: bl __aeabi_fmul +; NONE: {{b|bl}} __aeabi_fmul ; HARD: vmul.f32 s %0 = fmul float %a, %b ret float %0 @@ -53,8 +54,8 @@ define double @mul_d(double %a, double %b) { entry: ; CHECK-LABEL: mul_d: -; NONE: bl __aeabi_dmul -; SP: bl __aeabi_dmul +; NONE: {{b|bl}} __aeabi_dmul +; SP: {{b|bl}} __aeabi_dmul ; DP: vmul.f64 d0, d0, d1 %0 = fmul double %a, %b ret double %0 @@ -63,7 +64,7 @@ define float @div_f(float %a, float %b) { entry: ; CHECK-LABEL: div_f: -; NONE: bl __aeabi_fdiv +; NONE: {{b|bl}} __aeabi_fdiv ; HARD: vdiv.f32 s %0 = fdiv float %a, %b ret float %0 @@ -72,8 +73,8 @@ define double @div_d(double %a, double %b) { entry: ; CHECK-LABEL: div_d: -; NONE: bl __aeabi_ddiv -; SP: bl __aeabi_ddiv +; NONE: {{b|bl}} __aeabi_ddiv +; SP: {{b|bl}} __aeabi_ddiv ; DP: vdiv.f64 d0, d0, d1 %0 = fdiv double %a, %b ret double %0 @@ -109,7 +110,8 @@ define double @load_d(double* %a) { entry: ; CHECK-LABEL: load_d: -; NONE: ldm r0, {r0, r1} +; NOREGS: ldm r0, {r0, r1} +; ONLYREGS: vldr d0, [r0] ; HARD: vldr d0, [r0] %0 = load double, double* %a, align 8 ret double %0 @@ -127,7 +129,8 @@ define void @store_d(double* %a, double %b) { entry: ; CHECK-LABEL: store_d: -; NONE: strd r2, r3, [r0] +; NOREGS: strd r2, r3, [r0] +; ONLYREGS: vstr d0, [r0] ; HARD: vstr d0, [r0] store double %b, double* %a, align 8 ret void @@ -259,8 +262,10 @@ define float @select_f(float %a, float %b, i1 %c) { ; CHECK-LABEL: select_f: -; NONE: lsls r2, r2, #31 -; NONE: moveq r0, r1 +; NOREGS: lsls r2, r2, #31 +; NOREGS: moveq r0, r1 +; ONLYREGS: lsls r2, r2, #31 +; ONLYREGS: vmovne.f32 s2, s0 ; HARD: lsls r0, r0, #31 ; VFP4-ALL: vmovne.f32 s1, s0 ; VFP4-ALL: vmov.f32 s0, s1 @@ -273,8 +278,8 @@ ; CHECK-LABEL: select_d: ; NONE: ldr{{(.w)?}} [[REG:r[0-9]+]], [sp] ; NONE ands [[REG]], [[REG]], #1 -; NONE: moveq r0, r2 -; NONE: moveq r1, r3 +; NONE-DAG: moveq r0, r2 +; NONE-DAG: moveq r1, r3 ; SP: ands r0, r0, #1 ; SP-DAG: vmov [[ALO:r[0-9]+]], [[AHI:r[0-9]+]], d0 ; SP-DAG: vmov [[BLO:r[0-9]+]], [[BHI:r[0-9]+]], d1