Index: llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp =================================================================== --- llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -197,6 +197,10 @@ bool tryT2IndexedLoad(SDNode *N); bool tryMVEIndexedLoad(SDNode *N); bool tryFMULFixed(SDNode *N, SDLoc dl); + bool tryFP_TO_INT(SDNode *N, SDLoc dl); + bool transformFixedFloatingPointConversion(SDNode *N, SDLoc dl, SDNode *FMul, + bool IsUnsigned, + bool FixedToFloat); /// SelectVLD - Select NEON load intrinsics. NumVecs should be /// 1, 2, 3 or 4. The opcode arrays specify the instructions used for @@ -3150,47 +3154,39 @@ return false; } -bool ARMDAGToDAGISel::tryFMULFixed(SDNode *N, SDLoc dl) { - // Transform a fixed-point to floating-point conversion to a VCVT - if (!Subtarget->hasMVEFloatOps()) - return false; +bool ARMDAGToDAGISel::transformFixedFloatingPointConversion(SDNode *N, SDLoc dl, + SDNode *FMul, + bool IsUnsigned, + bool FixedToFloat) { auto Type = N->getValueType(0); - if (!Type.isVector()) - return false; - auto ScalarType = Type.getVectorElementType(); unsigned ScalarBits = ScalarType.getSizeInBits(); - auto LHS = N->getOperand(0); - auto RHS = N->getOperand(1); - if (ScalarBits > 32) return false; - if (RHS.getOpcode() == ISD::BITCAST) { - if (RHS.getValueType().getVectorElementType().getSizeInBits() != ScalarBits) - return false; - RHS = RHS.getOperand(0); - } - if (RHS.getValueType().getVectorElementType().getSizeInBits() != ScalarBits) - return false; - if (LHS.getOpcode() != ISD::SINT_TO_FP && LHS.getOpcode() != ISD::UINT_TO_FP) - return false; - - bool IsUnsigned = LHS.getOpcode() == ISD::UINT_TO_FP; - SDNodeFlags FMulFlags = N->getFlags(); + SDNodeFlags FMulFlags = FMul->getFlags(); // The fixed-point vcvt and vcvt+vmul are not always equivalent if inf is // allowed in 16 bit unsigned floats if (ScalarBits == 16 && !FMulFlags.hasNoInfs() && IsUnsigned) return false; + auto ImmNode = FMul->getOperand(1); + auto VecVal = FMul->getOperand(0); + + if (VecVal.getValueType().getVectorElementType().getSizeInBits() != + ScalarBits) + return false; + if (ImmNode.getOpcode() == ISD::BITCAST) + ImmNode = ImmNode.getOperand(0); + APFloat ImmAPF(0.0f); - switch (RHS.getOpcode()) { + switch (ImmNode.getOpcode()) { case ARMISD::VMOVIMM: case ARMISD::VDUP: { - if (!isa(RHS.getOperand(0))) + if (!isa(ImmNode.getOperand(0))) return false; - unsigned Imm = RHS.getConstantOperandVal(0); - if (RHS.getOpcode() == ARMISD::VMOVIMM) + unsigned Imm = ImmNode.getConstantOperandVal(0); + if (ImmNode.getOpcode() == ARMISD::VMOVIMM) Imm = ARM_AM::decodeVMOVModImm(Imm, ScalarBits); ImmAPF = APFloat(ScalarBits == 32 ? APFloat::IEEEsingle() : APFloat::IEEEhalf(), @@ -3198,24 +3194,25 @@ break; } case ARMISD::VMOVFPIMM: { - ImmAPF = APFloat(ARM_AM::getFPImmFloat(RHS.getConstantOperandVal(0))); + ImmAPF = APFloat(ARM_AM::getFPImmFloat(ImmNode.getConstantOperandVal(0))); break; } default: return false; } - // Multiplying by a factor of 2^(-n) will convert from fixed point to - // floating point, where n is the number of fractional bits in the fixed - // point number. Taking the inverse and log2 of the factor will give n - APFloat Inverse(0.0f); - if (!ImmAPF.getExactInverse(&Inverse)) - return false; - + // Multiplying by a factor of 2^n will convert from floating point to + // fixed point, where n is the number of fractional bits in the fixed + // point number. Taking log2 of the factor will give n + APFloat ToConvert = ImmAPF; + if (FixedToFloat) { + if (!ImmAPF.getExactInverse(&ToConvert)) + return false; + } APSInt Converted(64, 0); bool IsExact; - Inverse.convertToInteger(Converted, llvm::RoundingMode::NearestTiesToEven, - &IsExact); + ToConvert.convertToInteger(Converted, llvm::RoundingMode::NearestTiesToEven, + &IsExact); if (!IsExact || !Converted.isPowerOf2()) return false; @@ -3223,18 +3220,23 @@ if (FracBits > ScalarBits) return false; - auto SintToFpOperand = LHS.getOperand(0); - SmallVector Ops{SintToFpOperand, + SmallVector Ops{VecVal->getOperand(0), CurDAG->getConstant(FracBits, dl, MVT::i32)}; AddEmptyMVEPredicateToOps(Ops, dl, Type); unsigned int Opcode; switch (ScalarBits) { case 16: - Opcode = IsUnsigned ? ARM::MVE_VCVTf16u16_fix : ARM::MVE_VCVTf16s16_fix; + if (FixedToFloat) + Opcode = IsUnsigned ? ARM::MVE_VCVTf16u16_fix : ARM::MVE_VCVTf16s16_fix; + else + Opcode = IsUnsigned ? ARM::MVE_VCVTu16f16_fix : ARM::MVE_VCVTs16f16_fix; break; case 32: - Opcode = IsUnsigned ? ARM::MVE_VCVTf32u32_fix : ARM::MVE_VCVTf32s32_fix; + if (FixedToFloat) + Opcode = IsUnsigned ? ARM::MVE_VCVTf32u32_fix : ARM::MVE_VCVTf32s32_fix; + else + Opcode = IsUnsigned ? ARM::MVE_VCVTu32f32_fix : ARM::MVE_VCVTs32f32_fix; break; default: llvm_unreachable("unexpected number of scalar bits"); @@ -3245,6 +3247,69 @@ return true; } +bool ARMDAGToDAGISel::tryFP_TO_INT(SDNode *N, SDLoc dl) { + // Transform a floating-point to fixed-point conversion to a VCVT + if (!Subtarget->hasMVEFloatOps()) + return false; + auto Type = N->getValueType(0); + if (!Type.isVector()) + return false; + unsigned int ScalarBits = Type.getVectorElementType().getSizeInBits(); + + bool IsUnsigned = N->getOpcode() == ISD::FP_TO_UINT; + auto Node = N->getOperand(0).getNode(); + + // floating-point to fixed-point with one fractional bit gets turned into an + // FP_TO_[U|S]INT(FADD (x, x)) rather than an FP_TO_[U|S]INT(FMUL (x, y)) + if (Node->getOpcode() == ISD::FADD) { + if (Node->getOperand(0) != Node->getOperand(1)) + return false; + SDNodeFlags Flags = Node->getFlags(); + // The fixed-point vcvt and vcvt+vmul are not always equivalent if inf is + // allowed in 16 bit unsigned floats + if (ScalarBits == 16 && !Flags.hasNoInfs() && IsUnsigned) + return false; + + unsigned Opcode; + switch (ScalarBits) { + case 16: + Opcode = IsUnsigned ? ARM::MVE_VCVTu16f16_fix : ARM::MVE_VCVTs16f16_fix; + break; + case 32: + Opcode = IsUnsigned ? ARM::MVE_VCVTu32f32_fix : ARM::MVE_VCVTs32f32_fix; + break; + } + SmallVector Ops{Node->getOperand(0), + CurDAG->getConstant(1, dl, MVT::i32)}; + AddEmptyMVEPredicateToOps(Ops, dl, Type); + + ReplaceNode(N, CurDAG->getMachineNode(Opcode, dl, Type, Ops)); + return true; + } + + if (Node->getOpcode() != ISD::FMUL) { + return false; + } + + return transformFixedFloatingPointConversion(N, dl, Node, IsUnsigned, false); +} + +bool ARMDAGToDAGISel::tryFMULFixed(SDNode *N, SDLoc dl) { + // Transform a fixed-point to floating-point conversion to a VCVT + if (!Subtarget->hasMVEFloatOps()) + return false; + auto Type = N->getValueType(0); + if (!Type.isVector()) + return false; + + auto LHS = N->getOperand(0); + if (LHS.getOpcode() != ISD::SINT_TO_FP && LHS.getOpcode() != ISD::UINT_TO_FP) + return false; + + return transformFixedFloatingPointConversion( + N, dl, N, LHS.getOpcode() == ISD::UINT_TO_FP, true); +} + bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) { if (!Subtarget->hasV6T2Ops()) return false; @@ -3680,6 +3745,11 @@ if (tryV6T2BitfieldExtractOp(N, true)) return; break; + case ISD::FP_TO_UINT: + case ISD::FP_TO_SINT: + if (tryFP_TO_INT(N, dl)) + return; + break; case ISD::FMUL: if (tryFMULFixed(N, dl)) return; Index: llvm/test/CodeGen/Thumb2/mve-vcvt-float-to-fixed.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/Thumb2/mve-vcvt-float-to-fixed.ll @@ -0,0 +1,1032 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi %s -o - -mattr=+mve.fp | FileCheck %s + +define arm_aapcs_vfpcc <4 x i32> @vcvt_i32_1(<4 x float> %0) { +; CHECK-LABEL: vcvt_i32_1: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s32.f32 q0, q0, #1 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptosi <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_i32_2(<4 x float> %0) { +; CHECK-LABEL: vcvt_i32_2: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s32.f32 q0, q0, #2 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptosi <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_i32_3(<4 x float> %0) { +; CHECK-LABEL: vcvt_i32_3: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s32.f32 q0, q0, #3 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptosi <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_i32_4(<4 x float> %0) { +; CHECK-LABEL: vcvt_i32_4: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s32.f32 q0, q0, #4 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptosi <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_i32_5(<4 x float> %0) { +; CHECK-LABEL: vcvt_i32_5: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s32.f32 q0, q0, #5 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptosi <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_i32_6(<4 x float> %0) { +; CHECK-LABEL: vcvt_i32_6: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s32.f32 q0, q0, #6 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptosi <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_i32_7(<4 x float> %0) { +; CHECK-LABEL: vcvt_i32_7: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s32.f32 q0, q0, #7 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptosi <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_i32_8(<4 x float> %0) { +; CHECK-LABEL: vcvt_i32_8: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s32.f32 q0, q0, #8 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptosi <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_i32_9(<4 x float> %0) { +; CHECK-LABEL: vcvt_i32_9: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s32.f32 q0, q0, #9 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptosi <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_i32_10(<4 x float> %0) { +; CHECK-LABEL: vcvt_i32_10: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s32.f32 q0, q0, #10 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptosi <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_i32_11(<4 x float> %0) { +; CHECK-LABEL: vcvt_i32_11: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s32.f32 q0, q0, #11 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptosi <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_i32_12(<4 x float> %0) { +; CHECK-LABEL: vcvt_i32_12: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s32.f32 q0, q0, #12 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptosi <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_i32_13(<4 x float> %0) { +; CHECK-LABEL: vcvt_i32_13: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s32.f32 q0, q0, #13 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptosi <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_i32_14(<4 x float> %0) { +; CHECK-LABEL: vcvt_i32_14: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s32.f32 q0, q0, #14 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptosi <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_i32_15(<4 x float> %0) { +; CHECK-LABEL: vcvt_i32_15: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s32.f32 q0, q0, #15 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptosi <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_i32_16(<4 x float> %0) { +; CHECK-LABEL: vcvt_i32_16: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s32.f32 q0, q0, #16 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptosi <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_i32_17(<4 x float> %0) { +; CHECK-LABEL: vcvt_i32_17: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s32.f32 q0, q0, #17 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptosi <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_i32_18(<4 x float> %0) { +; CHECK-LABEL: vcvt_i32_18: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s32.f32 q0, q0, #18 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptosi <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_i32_19(<4 x float> %0) { +; CHECK-LABEL: vcvt_i32_19: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s32.f32 q0, q0, #19 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptosi <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_i32_20(<4 x float> %0) { +; CHECK-LABEL: vcvt_i32_20: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s32.f32 q0, q0, #20 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptosi <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_i32_21(<4 x float> %0) { +; CHECK-LABEL: vcvt_i32_21: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s32.f32 q0, q0, #21 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptosi <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_i32_22(<4 x float> %0) { +; CHECK-LABEL: vcvt_i32_22: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s32.f32 q0, q0, #22 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptosi <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_i32_23(<4 x float> %0) { +; CHECK-LABEL: vcvt_i32_23: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s32.f32 q0, q0, #23 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptosi <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_i32_24(<4 x float> %0) { +; CHECK-LABEL: vcvt_i32_24: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s32.f32 q0, q0, #24 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptosi <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_i32_25(<4 x float> %0) { +; CHECK-LABEL: vcvt_i32_25: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s32.f32 q0, q0, #25 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptosi <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_i32_26(<4 x float> %0) { +; CHECK-LABEL: vcvt_i32_26: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s32.f32 q0, q0, #26 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptosi <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_i32_27(<4 x float> %0) { +; CHECK-LABEL: vcvt_i32_27: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s32.f32 q0, q0, #27 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptosi <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_i32_28(<4 x float> %0) { +; CHECK-LABEL: vcvt_i32_28: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s32.f32 q0, q0, #28 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptosi <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_i32_29(<4 x float> %0) { +; CHECK-LABEL: vcvt_i32_29: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s32.f32 q0, q0, #29 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptosi <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_i32_30(<4 x float> %0) { +; CHECK-LABEL: vcvt_i32_30: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s32.f32 q0, q0, #30 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptosi <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_i32_31(<4 x float> %0) { +; CHECK-LABEL: vcvt_i32_31: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i32 q1, #0xcf000000 +; CHECK-NEXT: vmul.f32 q0, q0, q1 +; CHECK-NEXT: vcvt.s32.f32 q0, q0 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptosi <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_i32_32(<4 x float> %0) { +; CHECK-LABEL: vcvt_i32_32: +; CHECK: @ %bb.0: +; CHECK-NEXT: mov.w r0, #796917760 +; CHECK-NEXT: vmul.f32 q0, q0, r0 +; CHECK-NEXT: vcvt.s32.f32 q0, q0 +; CHECK-NEXT: bx lr + %2 = fmul <4 x float> %0, + %3 = fptosi <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_i32_33(<4 x float> %0) { +; CHECK-LABEL: vcvt_i32_33: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i32 q1, #0x2f000000 +; CHECK-NEXT: vmul.f32 q0, q0, q1 +; CHECK-NEXT: vcvt.s32.f32 q0, q0 +; CHECK-NEXT: bx lr + %2 = fmul <4 x float> %0, + %3 = fptosi <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <8 x i16> @vcvt_i16_1(<8 x half> %0) { +; CHECK-LABEL: vcvt_i16_1: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s16.f16 q0, q0, #1 +; CHECK-NEXT: bx lr + %2 = fmul fast <8 x half> %0, + %3 = fptosi <8 x half> %2 to <8 x i16> + ret <8 x i16> %3 +} + +define arm_aapcs_vfpcc <8 x i16> @vcvt_i16_2(<8 x half> %0) { +; CHECK-LABEL: vcvt_i16_2: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s16.f16 q0, q0, #2 +; CHECK-NEXT: bx lr + %2 = fmul fast <8 x half> %0, + %3 = fptosi <8 x half> %2 to <8 x i16> + ret <8 x i16> %3 +} + +define arm_aapcs_vfpcc <8 x i16> @vcvt_i16_3(<8 x half> %0) { +; CHECK-LABEL: vcvt_i16_3: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s16.f16 q0, q0, #3 +; CHECK-NEXT: bx lr + %2 = fmul fast <8 x half> %0, + %3 = fptosi <8 x half> %2 to <8 x i16> + ret <8 x i16> %3 +} + +define arm_aapcs_vfpcc <8 x i16> @vcvt_i16_4(<8 x half> %0) { +; CHECK-LABEL: vcvt_i16_4: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s16.f16 q0, q0, #4 +; CHECK-NEXT: bx lr + %2 = fmul fast <8 x half> %0, + %3 = fptosi <8 x half> %2 to <8 x i16> + ret <8 x i16> %3 +} + +define arm_aapcs_vfpcc <8 x i16> @vcvt_i16_5(<8 x half> %0) { +; CHECK-LABEL: vcvt_i16_5: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s16.f16 q0, q0, #5 +; CHECK-NEXT: bx lr + %2 = fmul fast <8 x half> %0, + %3 = fptosi <8 x half> %2 to <8 x i16> + ret <8 x i16> %3 +} + +define arm_aapcs_vfpcc <8 x i16> @vcvt_i16_6(<8 x half> %0) { +; CHECK-LABEL: vcvt_i16_6: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s16.f16 q0, q0, #6 +; CHECK-NEXT: bx lr + %2 = fmul fast <8 x half> %0, + %3 = fptosi <8 x half> %2 to <8 x i16> + ret <8 x i16> %3 +} + +define arm_aapcs_vfpcc <8 x i16> @vcvt_i16_7(<8 x half> %0) { +; CHECK-LABEL: vcvt_i16_7: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s16.f16 q0, q0, #7 +; CHECK-NEXT: bx lr + %2 = fmul fast <8 x half> %0, + %3 = fptosi <8 x half> %2 to <8 x i16> + ret <8 x i16> %3 +} + +define arm_aapcs_vfpcc <8 x i16> @vcvt_i16_8(<8 x half> %0) { +; CHECK-LABEL: vcvt_i16_8: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s16.f16 q0, q0, #8 +; CHECK-NEXT: bx lr + %2 = fmul fast <8 x half> %0, + %3 = fptosi <8 x half> %2 to <8 x i16> + ret <8 x i16> %3 +} + +define arm_aapcs_vfpcc <8 x i16> @vcvt_i16_9(<8 x half> %0) { +; CHECK-LABEL: vcvt_i16_9: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s16.f16 q0, q0, #9 +; CHECK-NEXT: bx lr + %2 = fmul fast <8 x half> %0, + %3 = fptosi <8 x half> %2 to <8 x i16> + ret <8 x i16> %3 +} + +define arm_aapcs_vfpcc <8 x i16> @vcvt_i16_10(<8 x half> %0) { +; CHECK-LABEL: vcvt_i16_10: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s16.f16 q0, q0, #10 +; CHECK-NEXT: bx lr + %2 = fmul fast <8 x half> %0, + %3 = fptosi <8 x half> %2 to <8 x i16> + ret <8 x i16> %3 +} + +define arm_aapcs_vfpcc <8 x i16> @vcvt_i16_11(<8 x half> %0) { +; CHECK-LABEL: vcvt_i16_11: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s16.f16 q0, q0, #11 +; CHECK-NEXT: bx lr + %2 = fmul fast <8 x half> %0, + %3 = fptosi <8 x half> %2 to <8 x i16> + ret <8 x i16> %3 +} + +define arm_aapcs_vfpcc <8 x i16> @vcvt_i16_12(<8 x half> %0) { +; CHECK-LABEL: vcvt_i16_12: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s16.f16 q0, q0, #12 +; CHECK-NEXT: bx lr + %2 = fmul fast <8 x half> %0, + %3 = fptosi <8 x half> %2 to <8 x i16> + ret <8 x i16> %3 +} + +define arm_aapcs_vfpcc <8 x i16> @vcvt_i16_13(<8 x half> %0) { +; CHECK-LABEL: vcvt_i16_13: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s16.f16 q0, q0, #13 +; CHECK-NEXT: bx lr + %2 = fmul fast <8 x half> %0, + %3 = fptosi <8 x half> %2 to <8 x i16> + ret <8 x i16> %3 +} + +define arm_aapcs_vfpcc <8 x i16> @vcvt_i16_14(<8 x half> %0) { +; CHECK-LABEL: vcvt_i16_14: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s16.f16 q0, q0, #14 +; CHECK-NEXT: bx lr + %2 = fmul fast <8 x half> %0, + %3 = fptosi <8 x half> %2 to <8 x i16> + ret <8 x i16> %3 +} + +define arm_aapcs_vfpcc <8 x i16> @vcvt_i16_15(<8 x half> %0) { +; CHECK-LABEL: vcvt_i16_15: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s16.f16 q0, q0, #15 +; CHECK-NEXT: bx lr + %2 = fmul fast <8 x half> %0, + %3 = fptosi <8 x half> %2 to <8 x i16> + ret <8 x i16> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_u32_1(<4 x float> %0) { +; CHECK-LABEL: vcvt_u32_1: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u32.f32 q0, q0, #1 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptoui <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_u32_2(<4 x float> %0) { +; CHECK-LABEL: vcvt_u32_2: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u32.f32 q0, q0, #2 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptoui <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_u32_3(<4 x float> %0) { +; CHECK-LABEL: vcvt_u32_3: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u32.f32 q0, q0, #3 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptoui <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_u32_4(<4 x float> %0) { +; CHECK-LABEL: vcvt_u32_4: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u32.f32 q0, q0, #4 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptoui <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_u32_5(<4 x float> %0) { +; CHECK-LABEL: vcvt_u32_5: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u32.f32 q0, q0, #5 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptoui <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_u32_6(<4 x float> %0) { +; CHECK-LABEL: vcvt_u32_6: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u32.f32 q0, q0, #6 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptoui <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_u32_7(<4 x float> %0) { +; CHECK-LABEL: vcvt_u32_7: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u32.f32 q0, q0, #7 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptoui <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_u32_8(<4 x float> %0) { +; CHECK-LABEL: vcvt_u32_8: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u32.f32 q0, q0, #8 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptoui <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_u32_9(<4 x float> %0) { +; CHECK-LABEL: vcvt_u32_9: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u32.f32 q0, q0, #9 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptoui <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_u32_10(<4 x float> %0) { +; CHECK-LABEL: vcvt_u32_10: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u32.f32 q0, q0, #10 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptoui <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_u32_11(<4 x float> %0) { +; CHECK-LABEL: vcvt_u32_11: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u32.f32 q0, q0, #11 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptoui <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_u32_12(<4 x float> %0) { +; CHECK-LABEL: vcvt_u32_12: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u32.f32 q0, q0, #12 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptoui <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_u32_13(<4 x float> %0) { +; CHECK-LABEL: vcvt_u32_13: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u32.f32 q0, q0, #13 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptoui <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_u32_14(<4 x float> %0) { +; CHECK-LABEL: vcvt_u32_14: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u32.f32 q0, q0, #14 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptoui <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_u32_15(<4 x float> %0) { +; CHECK-LABEL: vcvt_u32_15: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u32.f32 q0, q0, #15 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptoui <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_u32_16(<4 x float> %0) { +; CHECK-LABEL: vcvt_u32_16: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u32.f32 q0, q0, #16 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptoui <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_u32_17(<4 x float> %0) { +; CHECK-LABEL: vcvt_u32_17: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u32.f32 q0, q0, #17 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptoui <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_u32_18(<4 x float> %0) { +; CHECK-LABEL: vcvt_u32_18: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u32.f32 q0, q0, #18 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptoui <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_u32_19(<4 x float> %0) { +; CHECK-LABEL: vcvt_u32_19: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u32.f32 q0, q0, #19 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptoui <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_u32_20(<4 x float> %0) { +; CHECK-LABEL: vcvt_u32_20: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u32.f32 q0, q0, #20 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptoui <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_u32_21(<4 x float> %0) { +; CHECK-LABEL: vcvt_u32_21: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u32.f32 q0, q0, #21 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptoui <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_u32_22(<4 x float> %0) { +; CHECK-LABEL: vcvt_u32_22: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u32.f32 q0, q0, #22 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptoui <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_u32_23(<4 x float> %0) { +; CHECK-LABEL: vcvt_u32_23: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u32.f32 q0, q0, #23 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptoui <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_u32_24(<4 x float> %0) { +; CHECK-LABEL: vcvt_u32_24: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u32.f32 q0, q0, #24 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptoui <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_u32_25(<4 x float> %0) { +; CHECK-LABEL: vcvt_u32_25: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u32.f32 q0, q0, #25 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptoui <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_u32_26(<4 x float> %0) { +; CHECK-LABEL: vcvt_u32_26: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u32.f32 q0, q0, #26 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptoui <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_u32_27(<4 x float> %0) { +; CHECK-LABEL: vcvt_u32_27: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u32.f32 q0, q0, #27 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptoui <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_u32_28(<4 x float> %0) { +; CHECK-LABEL: vcvt_u32_28: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u32.f32 q0, q0, #28 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptoui <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_u32_29(<4 x float> %0) { +; CHECK-LABEL: vcvt_u32_29: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u32.f32 q0, q0, #29 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptoui <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_u32_30(<4 x float> %0) { +; CHECK-LABEL: vcvt_u32_30: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u32.f32 q0, q0, #30 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptoui <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x i32> @vcvt_u32_31(<4 x float> %0) { +; CHECK-LABEL: vcvt_u32_31: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i32 q1, #0xcf000000 +; CHECK-NEXT: vmul.f32 q0, q0, q1 +; CHECK-NEXT: vcvt.u32.f32 q0, q0 +; CHECK-NEXT: bx lr + %2 = fmul fast <4 x float> %0, + %3 = fptoui <4 x float> %2 to <4 x i32> + ret <4 x i32> %3 +} + +define arm_aapcs_vfpcc <4 x float> @vcvt_u32_32(<4 x i32> %0) { +; CHECK-LABEL: vcvt_u32_32: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.f32.u32 q0, q0, #32 +; CHECK-NEXT: bx lr + %2 = uitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define arm_aapcs_vfpcc <4 x float> @vcvt_u32_33(<4 x i32> %0) { +; CHECK-LABEL: vcvt_u32_33: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i32 q1, #0x2f000000 +; CHECK-NEXT: vcvt.f32.u32 q0, q0 +; CHECK-NEXT: vmul.f32 q0, q0, q1 +; CHECK-NEXT: bx lr + %2 = uitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define arm_aapcs_vfpcc <8 x i16> @vcvt_u16_1(<8 x half> %0) { +; CHECK-LABEL: vcvt_u16_1: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u16.f16 q0, q0, #1 +; CHECK-NEXT: bx lr + %2 = fmul fast <8 x half> %0, + %3 = fptoui <8 x half> %2 to <8 x i16> + ret <8 x i16> %3 +} + +define arm_aapcs_vfpcc <8 x i16> @vcvt_u16_2(<8 x half> %0) { +; CHECK-LABEL: vcvt_u16_2: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u16.f16 q0, q0, #2 +; CHECK-NEXT: bx lr + %2 = fmul fast <8 x half> %0, + %3 = fptoui <8 x half> %2 to <8 x i16> + ret <8 x i16> %3 +} + +define arm_aapcs_vfpcc <8 x i16> @vcvt_u16_3(<8 x half> %0) { +; CHECK-LABEL: vcvt_u16_3: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u16.f16 q0, q0, #3 +; CHECK-NEXT: bx lr + %2 = fmul fast <8 x half> %0, + %3 = fptoui <8 x half> %2 to <8 x i16> + ret <8 x i16> %3 +} + +define arm_aapcs_vfpcc <8 x i16> @vcvt_u16_4(<8 x half> %0) { +; CHECK-LABEL: vcvt_u16_4: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u16.f16 q0, q0, #4 +; CHECK-NEXT: bx lr + %2 = fmul fast <8 x half> %0, + %3 = fptoui <8 x half> %2 to <8 x i16> + ret <8 x i16> %3 +} + +define arm_aapcs_vfpcc <8 x i16> @vcvt_u16_5(<8 x half> %0) { +; CHECK-LABEL: vcvt_u16_5: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u16.f16 q0, q0, #5 +; CHECK-NEXT: bx lr + %2 = fmul fast <8 x half> %0, + %3 = fptoui <8 x half> %2 to <8 x i16> + ret <8 x i16> %3 +} + +define arm_aapcs_vfpcc <8 x i16> @vcvt_u16_6(<8 x half> %0) { +; CHECK-LABEL: vcvt_u16_6: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u16.f16 q0, q0, #6 +; CHECK-NEXT: bx lr + %2 = fmul fast <8 x half> %0, + %3 = fptoui <8 x half> %2 to <8 x i16> + ret <8 x i16> %3 +} + +define arm_aapcs_vfpcc <8 x i16> @vcvt_u16_7(<8 x half> %0) { +; CHECK-LABEL: vcvt_u16_7: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u16.f16 q0, q0, #7 +; CHECK-NEXT: bx lr + %2 = fmul fast <8 x half> %0, + %3 = fptoui <8 x half> %2 to <8 x i16> + ret <8 x i16> %3 +} + +define arm_aapcs_vfpcc <8 x i16> @vcvt_u16_8(<8 x half> %0) { +; CHECK-LABEL: vcvt_u16_8: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u16.f16 q0, q0, #8 +; CHECK-NEXT: bx lr + %2 = fmul fast <8 x half> %0, + %3 = fptoui <8 x half> %2 to <8 x i16> + ret <8 x i16> %3 +} + +define arm_aapcs_vfpcc <8 x i16> @vcvt_u16_9(<8 x half> %0) { +; CHECK-LABEL: vcvt_u16_9: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u16.f16 q0, q0, #9 +; CHECK-NEXT: bx lr + %2 = fmul fast <8 x half> %0, + %3 = fptoui <8 x half> %2 to <8 x i16> + ret <8 x i16> %3 +} + +define arm_aapcs_vfpcc <8 x i16> @vcvt_u16_10(<8 x half> %0) { +; CHECK-LABEL: vcvt_u16_10: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u16.f16 q0, q0, #10 +; CHECK-NEXT: bx lr + %2 = fmul fast <8 x half> %0, + %3 = fptoui <8 x half> %2 to <8 x i16> + ret <8 x i16> %3 +} + +define arm_aapcs_vfpcc <8 x i16> @vcvt_u16_11(<8 x half> %0) { +; CHECK-LABEL: vcvt_u16_11: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u16.f16 q0, q0, #11 +; CHECK-NEXT: bx lr + %2 = fmul fast <8 x half> %0, + %3 = fptoui <8 x half> %2 to <8 x i16> + ret <8 x i16> %3 +} + +define arm_aapcs_vfpcc <8 x i16> @vcvt_u16_12(<8 x half> %0) { +; CHECK-LABEL: vcvt_u16_12: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u16.f16 q0, q0, #12 +; CHECK-NEXT: bx lr + %2 = fmul fast <8 x half> %0, + %3 = fptoui <8 x half> %2 to <8 x i16> + ret <8 x i16> %3 +} + +define arm_aapcs_vfpcc <8 x i16> @vcvt_u16_13(<8 x half> %0) { +; CHECK-LABEL: vcvt_u16_13: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u16.f16 q0, q0, #13 +; CHECK-NEXT: bx lr + %2 = fmul fast <8 x half> %0, + %3 = fptoui <8 x half> %2 to <8 x i16> + ret <8 x i16> %3 +} + +define arm_aapcs_vfpcc <8 x i16> @vcvt_u16_14(<8 x half> %0) { +; CHECK-LABEL: vcvt_u16_14: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u16.f16 q0, q0, #14 +; CHECK-NEXT: bx lr + %2 = fmul fast <8 x half> %0, + %3 = fptoui <8 x half> %2 to <8 x i16> + ret <8 x i16> %3 +} + +define arm_aapcs_vfpcc <8 x i16> @vcvt_u16_15(<8 x half> %0) { +; CHECK-LABEL: vcvt_u16_15: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.u16.f16 q0, q0, #15 +; CHECK-NEXT: bx lr + %2 = fmul fast <8 x half> %0, + %3 = fptoui <8 x half> %2 to <8 x i16> + ret <8 x i16> %3 +} + +define arm_aapcs_vfpcc <8 x i16> @vcvt_u16_inf(<8 x half> %0) { +; CHECK-LABEL: vcvt_u16_inf: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i16 q1, #0x7800 +; CHECK-NEXT: vmul.f16 q0, q0, q1 +; CHECK-NEXT: vcvt.u16.f16 q0, q0 +; CHECK-NEXT: bx lr + %2 = fmul <8 x half> %0, + %3 = fptoui <8 x half> %2 to <8 x i16> + ret <8 x i16> %3 +} + +define arm_aapcs_vfpcc <8 x i16> @vcvt_s16_inf(<8 x half> %0) { +; CHECK-LABEL: vcvt_s16_inf: +; CHECK: @ %bb.0: +; CHECK-NEXT: vcvt.s16.f16 q0, q0, #15 +; CHECK-NEXT: bx lr + %2 = fmul <8 x half> %0, + %3 = fptosi <8 x half> %2 to <8 x i16> + ret <8 x i16> %3 +} + + +define arm_aapcs_vfpcc <4 x float> @vcvt_bad_imm(<4 x i32> %0) { +; CHECK-LABEL: vcvt_bad_imm: +; CHECK: @ %bb.0: +; CHECK-NEXT: movw r0, #2048 +; CHECK-NEXT: vcvt.f32.s32 q0, q0 +; CHECK-NEXT: movt r0, #15104 +; CHECK-NEXT: vmul.f32 q0, q0, r0 +; CHECK-NEXT: bx lr + %2 = sitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define arm_aapcs_vfpcc <4 x float> @vcvt_negative(<4 x i32> %0) { +; CHECK-LABEL: vcvt_negative: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i32 q1, #0xb8000000 +; CHECK-NEXT: vcvt.f32.s32 q0, q0 +; CHECK-NEXT: vmul.f32 q0, q0, q1 +; CHECK-NEXT: bx lr + %2 = sitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define arm_aapcs_vfpcc <4 x float> @vcvt_negative2(<4 x i32> %0) { +; CHECK-LABEL: vcvt_negative2: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov.i32 q1, #0xb0000000 +; CHECK-NEXT: vcvt.f32.s32 q0, q0 +; CHECK-NEXT: vmul.f32 q0, q0, q1 +; CHECK-NEXT: bx lr + %2 = sitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +}