Index: llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp =================================================================== --- llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -34,6 +34,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/ADT/APSInt.h" using namespace llvm; @@ -3581,6 +3582,95 @@ if (tryV6T2BitfieldExtractOp(N, true)) return; break; + case ISD::FMUL: { + // Transform a fixed-point to floating-point conversion to a VCVT + if (!Subtarget->hasMVEFloatOps()) + break; + auto Type = N->getValueType(0); + if (!Type.isVector()) + break; + + auto ScalarType = Type.getVectorElementType(); + unsigned ScalarBits = ScalarType.getSizeInBits(); + auto LHS = N->getOperand(0); + auto RHS = N->getOperand(1); + + if (ScalarBits > 32) + break; + + SDNodeFlags FMulFlags = N->getFlags(); + // The fixed-point vcvt and vcvt+vmul are not equivalent if inf is allowed + // in 16 bit floats + if (ScalarBits == 16 && !FMulFlags.hasNoInfs()) + break; + + if (RHS.getOpcode() == ISD::BITCAST) + RHS = RHS.getOperand(0); + if (LHS.getOpcode() == ISD::SINT_TO_FP || + LHS.getOpcode() == ISD::UINT_TO_FP) { + bool IsUnsigned = LHS.getOpcode() == ISD::UINT_TO_FP; + + APFloat ImmAPF(0.0f); + bool valid = true; + switch (RHS.getOpcode()) { + case ARMISD::VMOVIMM: + case ARMISD::VDUP: { + unsigned Imm = RHS.getConstantOperandVal(0); + if (RHS.getOpcode() == ARMISD::VMOVIMM) + Imm = ARM_AM::decodeVMOVModImm(Imm, ScalarBits); + ImmAPF = APFloat(ScalarBits == 32 + ? APFloat::IEEEsingle() + : (ScalarBits == 16 ? APFloat::IEEEhalf() + : APFloat::IEEEdouble()), + APInt(ScalarBits, Imm)); + break; + } + case ARMISD::VMOVFPIMM: { + ImmAPF = APFloat(ARM_AM::getFPImmFloat(RHS.getConstantOperandVal(0))); + break; + } + default: + break; + } + + if (!valid) + break; + + // Multiplying by a factor of 2^(-n) will convert from fixed point to + // floating point, where n is the number of fractional bits in the fixed + // point number. Taking the inverse and log2 of the factor will give n + APFloat Inverse(0.0f); + if (!ImmAPF.getExactInverse(&Inverse)) + break; + + APSInt Converted(ScalarBits, 0); + bool IsExact; + Inverse.convertToInteger(Converted, llvm::RoundingMode::NearestTiesToEven, + &IsExact); + if (!IsExact || !Converted.isPowerOf2()) + break; + + int FracBits = Converted.logBase2(); + auto SintToFpOperand = LHS.getOperand(0); + SmallVector Ops{SintToFpOperand, + CurDAG->getConstant(FracBits, dl, MVT::i32)}; + AddEmptyMVEPredicateToOps(Ops, dl, Type); + + unsigned int Opcode = ARM::MVE_VCVTf32s32_fix; + switch (ScalarBits) { + case 16: + Opcode = IsUnsigned ? ARM::MVE_VCVTf16u16_fix : ARM::MVE_VCVTf16s16_fix; + break; + case 32: + Opcode = IsUnsigned ? ARM::MVE_VCVTf32u32_fix : ARM::MVE_VCVTf32s32_fix; + break; + } + + ReplaceNode(N, CurDAG->getMachineNode(Opcode, dl, Type, Ops)); + return; + } + break; + } case ISD::MUL: if (Subtarget->isThumb1Only()) break; Index: llvm/test/CodeGen/ARM/arm_q15_to_float_autovec.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/ARM/arm_q15_to_float_autovec.ll @@ -0,0 +1,1343 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -O3 -mtriple=thumbv8.1m.main-arm-none-eabi %s -o - -mattr=+mve.fp | FileCheck %s + +define dso_local <4 x float> @vcvt_i32_1(<4 x i32> %0) local_unnamed_addr { +; CHECK-LABEL: vcvt_i32_1: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f32.s32 q0, q0, #1 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = sitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define dso_local <4 x float> @vcvt_i32_2(<4 x i32> %0) local_unnamed_addr { +; CHECK-LABEL: vcvt_i32_2: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f32.s32 q0, q0, #2 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = sitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define dso_local <4 x float> @vcvt_i32_3(<4 x i32> %0) local_unnamed_addr { +; CHECK-LABEL: vcvt_i32_3: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f32.s32 q0, q0, #3 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = sitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define dso_local <4 x float> @vcvt_i32_4(<4 x i32> %0) local_unnamed_addr { +; CHECK-LABEL: vcvt_i32_4: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f32.s32 q0, q0, #4 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = sitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define dso_local <4 x float> @vcvt_i32_5(<4 x i32> %0) local_unnamed_addr { +; CHECK-LABEL: vcvt_i32_5: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f32.s32 q0, q0, #5 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = sitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define dso_local <4 x float> @vcvt_i32_6(<4 x i32> %0) local_unnamed_addr { +; CHECK-LABEL: vcvt_i32_6: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f32.s32 q0, q0, #6 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = sitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define dso_local <4 x float> @vcvt_i32_7(<4 x i32> %0) local_unnamed_addr { +; CHECK-LABEL: vcvt_i32_7: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f32.s32 q0, q0, #7 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = sitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define dso_local <4 x float> @vcvt_i32_8(<4 x i32> %0) local_unnamed_addr { +; CHECK-LABEL: vcvt_i32_8: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f32.s32 q0, q0, #8 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = sitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define dso_local <4 x float> @vcvt_i32_9(<4 x i32> %0) local_unnamed_addr { +; CHECK-LABEL: vcvt_i32_9: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f32.s32 q0, q0, #9 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = sitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define dso_local <4 x float> @vcvt_i32_10(<4 x i32> %0) local_unnamed_addr { +; CHECK-LABEL: vcvt_i32_10: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f32.s32 q0, q0, #10 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = sitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define dso_local <4 x float> @vcvt_i32_11(<4 x i32> %0) local_unnamed_addr { +; CHECK-LABEL: vcvt_i32_11: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f32.s32 q0, q0, #11 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = sitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define dso_local <4 x float> @vcvt_i32_12(<4 x i32> %0) local_unnamed_addr { +; CHECK-LABEL: vcvt_i32_12: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f32.s32 q0, q0, #12 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = sitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define dso_local <4 x float> @vcvt_i32_13(<4 x i32> %0) local_unnamed_addr { +; CHECK-LABEL: vcvt_i32_13: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f32.s32 q0, q0, #13 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = sitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define dso_local <4 x float> @vcvt_i32_14(<4 x i32> %0) local_unnamed_addr { +; CHECK-LABEL: vcvt_i32_14: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f32.s32 q0, q0, #14 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = sitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define dso_local <4 x float> @vcvt_i32_15(<4 x i32> %0) local_unnamed_addr { +; CHECK-LABEL: vcvt_i32_15: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f32.s32 q0, q0, #15 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = sitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define dso_local <4 x float> @vcvt_i32_16(<4 x i32> %0) local_unnamed_addr { +; CHECK-LABEL: vcvt_i32_16: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f32.s32 q0, q0, #16 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = sitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define dso_local <4 x float> @vcvt_i32_17(<4 x i32> %0) local_unnamed_addr { +; CHECK-LABEL: vcvt_i32_17: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f32.s32 q0, q0, #17 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = sitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define dso_local <4 x float> @vcvt_i32_18(<4 x i32> %0) local_unnamed_addr { +; CHECK-LABEL: vcvt_i32_18: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f32.s32 q0, q0, #18 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = sitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define dso_local <4 x float> @vcvt_i32_19(<4 x i32> %0) local_unnamed_addr { +; CHECK-LABEL: vcvt_i32_19: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f32.s32 q0, q0, #19 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = sitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define dso_local <4 x float> @vcvt_i32_20(<4 x i32> %0) local_unnamed_addr { +; CHECK-LABEL: vcvt_i32_20: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f32.s32 q0, q0, #20 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = sitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define dso_local <4 x float> @vcvt_i32_21(<4 x i32> %0) local_unnamed_addr { +; CHECK-LABEL: vcvt_i32_21: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f32.s32 q0, q0, #21 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = sitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define dso_local <4 x float> @vcvt_i32_22(<4 x i32> %0) local_unnamed_addr { +; CHECK-LABEL: vcvt_i32_22: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f32.s32 q0, q0, #22 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = sitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define dso_local <4 x float> @vcvt_i32_23(<4 x i32> %0) local_unnamed_addr { +; CHECK-LABEL: vcvt_i32_23: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f32.s32 q0, q0, #23 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = sitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define dso_local <4 x float> @vcvt_i32_24(<4 x i32> %0) local_unnamed_addr { +; CHECK-LABEL: vcvt_i32_24: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f32.s32 q0, q0, #24 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = sitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define dso_local <4 x float> @vcvt_i32_25(<4 x i32> %0) local_unnamed_addr { +; CHECK-LABEL: vcvt_i32_25: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f32.s32 q0, q0, #25 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = sitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define dso_local <4 x float> @vcvt_i32_26(<4 x i32> %0) local_unnamed_addr { +; CHECK-LABEL: vcvt_i32_26: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f32.s32 q0, q0, #26 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = sitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define dso_local <4 x float> @vcvt_i32_27(<4 x i32> %0) local_unnamed_addr { +; CHECK-LABEL: vcvt_i32_27: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f32.s32 q0, q0, #27 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = sitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define dso_local <4 x float> @vcvt_i32_28(<4 x i32> %0) local_unnamed_addr { +; CHECK-LABEL: vcvt_i32_28: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f32.s32 q0, q0, #28 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = sitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define dso_local <4 x float> @vcvt_i32_29(<4 x i32> %0) local_unnamed_addr { +; CHECK-LABEL: vcvt_i32_29: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f32.s32 q0, q0, #29 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = sitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define dso_local <4 x float> @vcvt_i32_30(<4 x i32> %0) local_unnamed_addr { +; CHECK-LABEL: vcvt_i32_30: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f32.s32 q0, q0, #30 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = sitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define dso_local <4 x float> @vcvt_i32_31(<4 x i32> %0) local_unnamed_addr { +; CHECK-LABEL: vcvt_i32_31: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f32.s32 q0, q0, #31 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = sitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define dso_local <8 x half> @vcvt_i16_1(<8 x i16> %0) local_unnamed_addr #0 { +; CHECK-LABEL: vcvt_i16_1: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f16.s16 q0, q0, #1 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = sitofp <8 x i16> %0 to <8 x half> + %3 = fmul ninf <8 x half> %2, + ret <8 x half> %3 +} + +define dso_local <8 x half> @vcvt_i16_2(<8 x i16> %0) local_unnamed_addr #0 { +; CHECK-LABEL: vcvt_i16_2: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f16.s16 q0, q0, #2 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = sitofp <8 x i16> %0 to <8 x half> + %3 = fmul ninf <8 x half> %2, + ret <8 x half> %3 +} + +define dso_local <8 x half> @vcvt_i16_3(<8 x i16> %0) local_unnamed_addr #0 { +; CHECK-LABEL: vcvt_i16_3: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f16.s16 q0, q0, #3 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = sitofp <8 x i16> %0 to <8 x half> + %3 = fmul ninf <8 x half> %2, + ret <8 x half> %3 +} + +define dso_local <8 x half> @vcvt_i16_4(<8 x i16> %0) local_unnamed_addr #0 { +; CHECK-LABEL: vcvt_i16_4: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f16.s16 q0, q0, #4 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = sitofp <8 x i16> %0 to <8 x half> + %3 = fmul ninf <8 x half> %2, + ret <8 x half> %3 +} + +define dso_local <8 x half> @vcvt_i16_5(<8 x i16> %0) local_unnamed_addr #0 { +; CHECK-LABEL: vcvt_i16_5: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f16.s16 q0, q0, #5 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = sitofp <8 x i16> %0 to <8 x half> + %3 = fmul ninf <8 x half> %2, + ret <8 x half> %3 +} + +define dso_local <8 x half> @vcvt_i16_6(<8 x i16> %0) local_unnamed_addr #0 { +; CHECK-LABEL: vcvt_i16_6: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f16.s16 q0, q0, #6 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = sitofp <8 x i16> %0 to <8 x half> + %3 = fmul ninf <8 x half> %2, + ret <8 x half> %3 +} + +define dso_local <8 x half> @vcvt_i16_7(<8 x i16> %0) local_unnamed_addr #0 { +; CHECK-LABEL: vcvt_i16_7: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f16.s16 q0, q0, #7 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = sitofp <8 x i16> %0 to <8 x half> + %3 = fmul ninf <8 x half> %2, + ret <8 x half> %3 +} + +define dso_local <8 x half> @vcvt_i16_8(<8 x i16> %0) local_unnamed_addr #0 { +; CHECK-LABEL: vcvt_i16_8: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f16.s16 q0, q0, #8 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = sitofp <8 x i16> %0 to <8 x half> + %3 = fmul ninf <8 x half> %2, + ret <8 x half> %3 +} + +define dso_local <8 x half> @vcvt_i16_9(<8 x i16> %0) local_unnamed_addr #0 { +; CHECK-LABEL: vcvt_i16_9: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f16.s16 q0, q0, #9 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = sitofp <8 x i16> %0 to <8 x half> + %3 = fmul ninf <8 x half> %2, + ret <8 x half> %3 +} + +define dso_local <8 x half> @vcvt_i16_10(<8 x i16> %0) local_unnamed_addr #0 { +; CHECK-LABEL: vcvt_i16_10: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f16.s16 q0, q0, #10 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = sitofp <8 x i16> %0 to <8 x half> + %3 = fmul ninf <8 x half> %2, + ret <8 x half> %3 +} + +define dso_local <8 x half> @vcvt_i16_11(<8 x i16> %0) local_unnamed_addr #0 { +; CHECK-LABEL: vcvt_i16_11: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f16.s16 q0, q0, #11 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = sitofp <8 x i16> %0 to <8 x half> + %3 = fmul ninf <8 x half> %2, + ret <8 x half> %3 +} + +define dso_local <8 x half> @vcvt_i16_12(<8 x i16> %0) local_unnamed_addr #0 { +; CHECK-LABEL: vcvt_i16_12: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f16.s16 q0, q0, #12 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = sitofp <8 x i16> %0 to <8 x half> + %3 = fmul ninf <8 x half> %2, + ret <8 x half> %3 +} + +define dso_local <8 x half> @vcvt_i16_13(<8 x i16> %0) local_unnamed_addr #0 { +; CHECK-LABEL: vcvt_i16_13: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f16.s16 q0, q0, #13 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = sitofp <8 x i16> %0 to <8 x half> + %3 = fmul ninf <8 x half> %2, + ret <8 x half> %3 +} + +define dso_local <8 x half> @vcvt_i16_14(<8 x i16> %0) local_unnamed_addr #0 { +; CHECK-LABEL: vcvt_i16_14: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f16.s16 q0, q0, #14 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = sitofp <8 x i16> %0 to <8 x half> + %3 = fmul ninf <8 x half> %2, + ret <8 x half> %3 +} + +define dso_local <8 x half> @vcvt_i16_15(<8 x i16> %0) local_unnamed_addr #0 { +; CHECK-LABEL: vcvt_i16_15: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov.i16 q1, #0x200 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f16.s16 q0, q0 +; CHECK-NEXT: vmul.f16 q0, q0, q1 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = sitofp <8 x i16> %0 to <8 x half> + %3 = fmul ninf <8 x half> %2, + ret <8 x half> %3 +} + +define dso_local <4 x float> @vcvt_u32_1(<4 x i32> %0) local_unnamed_addr #0 { +; CHECK-LABEL: vcvt_u32_1: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f32.u32 q0, q0, #1 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = uitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define dso_local <4 x float> @vcvt_u32_2(<4 x i32> %0) local_unnamed_addr #0 { +; CHECK-LABEL: vcvt_u32_2: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f32.u32 q0, q0, #2 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = uitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define dso_local <4 x float> @vcvt_u32_3(<4 x i32> %0) local_unnamed_addr #0 { +; CHECK-LABEL: vcvt_u32_3: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f32.u32 q0, q0, #3 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = uitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define dso_local <4 x float> @vcvt_u32_4(<4 x i32> %0) local_unnamed_addr #0 { +; CHECK-LABEL: vcvt_u32_4: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f32.u32 q0, q0, #4 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = uitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define dso_local <4 x float> @vcvt_u32_5(<4 x i32> %0) local_unnamed_addr #0 { +; CHECK-LABEL: vcvt_u32_5: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f32.u32 q0, q0, #5 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = uitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define dso_local <4 x float> @vcvt_u32_6(<4 x i32> %0) local_unnamed_addr #0 { +; CHECK-LABEL: vcvt_u32_6: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f32.u32 q0, q0, #6 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = uitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define dso_local <4 x float> @vcvt_u32_7(<4 x i32> %0) local_unnamed_addr #0 { +; CHECK-LABEL: vcvt_u32_7: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f32.u32 q0, q0, #7 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = uitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define dso_local <4 x float> @vcvt_u32_8(<4 x i32> %0) local_unnamed_addr #0 { +; CHECK-LABEL: vcvt_u32_8: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f32.u32 q0, q0, #8 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = uitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define dso_local <4 x float> @vcvt_u32_9(<4 x i32> %0) local_unnamed_addr #0 { +; CHECK-LABEL: vcvt_u32_9: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f32.u32 q0, q0, #9 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = uitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define dso_local <4 x float> @vcvt_u32_10(<4 x i32> %0) local_unnamed_addr #0 { +; CHECK-LABEL: vcvt_u32_10: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f32.u32 q0, q0, #10 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = uitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define dso_local <4 x float> @vcvt_u32_11(<4 x i32> %0) local_unnamed_addr #0 { +; CHECK-LABEL: vcvt_u32_11: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f32.u32 q0, q0, #11 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = uitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define dso_local <4 x float> @vcvt_u32_12(<4 x i32> %0) local_unnamed_addr #0 { +; CHECK-LABEL: vcvt_u32_12: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f32.u32 q0, q0, #12 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = uitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define dso_local <4 x float> @vcvt_u32_13(<4 x i32> %0) local_unnamed_addr #0 { +; CHECK-LABEL: vcvt_u32_13: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f32.u32 q0, q0, #13 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = uitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define dso_local <4 x float> @vcvt_u32_14(<4 x i32> %0) local_unnamed_addr #0 { +; CHECK-LABEL: vcvt_u32_14: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f32.u32 q0, q0, #14 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = uitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define dso_local <4 x float> @vcvt_u32_15(<4 x i32> %0) local_unnamed_addr #0 { +; CHECK-LABEL: vcvt_u32_15: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f32.u32 q0, q0, #15 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = uitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define dso_local <4 x float> @vcvt_u32_16(<4 x i32> %0) local_unnamed_addr #0 { +; CHECK-LABEL: vcvt_u32_16: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f32.u32 q0, q0, #16 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = uitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define dso_local <4 x float> @vcvt_u32_17(<4 x i32> %0) local_unnamed_addr #0 { +; CHECK-LABEL: vcvt_u32_17: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f32.u32 q0, q0, #17 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = uitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define dso_local <4 x float> @vcvt_u32_18(<4 x i32> %0) local_unnamed_addr #0 { +; CHECK-LABEL: vcvt_u32_18: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f32.u32 q0, q0, #18 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = uitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define dso_local <4 x float> @vcvt_u32_19(<4 x i32> %0) local_unnamed_addr #0 { +; CHECK-LABEL: vcvt_u32_19: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f32.u32 q0, q0, #19 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = uitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define dso_local <4 x float> @vcvt_u32_20(<4 x i32> %0) local_unnamed_addr #0 { +; CHECK-LABEL: vcvt_u32_20: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f32.u32 q0, q0, #20 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = uitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define dso_local <4 x float> @vcvt_u32_21(<4 x i32> %0) local_unnamed_addr #0 { +; CHECK-LABEL: vcvt_u32_21: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f32.u32 q0, q0, #21 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = uitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define dso_local <4 x float> @vcvt_u32_22(<4 x i32> %0) local_unnamed_addr #0 { +; CHECK-LABEL: vcvt_u32_22: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f32.u32 q0, q0, #22 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = uitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define dso_local <4 x float> @vcvt_u32_23(<4 x i32> %0) local_unnamed_addr #0 { +; CHECK-LABEL: vcvt_u32_23: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f32.u32 q0, q0, #23 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = uitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define dso_local <4 x float> @vcvt_u32_24(<4 x i32> %0) local_unnamed_addr #0 { +; CHECK-LABEL: vcvt_u32_24: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f32.u32 q0, q0, #24 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = uitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define dso_local <4 x float> @vcvt_u32_25(<4 x i32> %0) local_unnamed_addr #0 { +; CHECK-LABEL: vcvt_u32_25: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f32.u32 q0, q0, #25 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = uitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define dso_local <4 x float> @vcvt_u32_26(<4 x i32> %0) local_unnamed_addr #0 { +; CHECK-LABEL: vcvt_u32_26: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f32.u32 q0, q0, #26 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = uitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define dso_local <4 x float> @vcvt_u32_27(<4 x i32> %0) local_unnamed_addr #0 { +; CHECK-LABEL: vcvt_u32_27: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f32.u32 q0, q0, #27 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = uitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define dso_local <4 x float> @vcvt_u32_28(<4 x i32> %0) local_unnamed_addr #0 { +; CHECK-LABEL: vcvt_u32_28: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f32.u32 q0, q0, #28 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = uitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define dso_local <4 x float> @vcvt_u32_29(<4 x i32> %0) local_unnamed_addr #0 { +; CHECK-LABEL: vcvt_u32_29: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f32.u32 q0, q0, #29 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = uitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define dso_local <4 x float> @vcvt_u32_30(<4 x i32> %0) local_unnamed_addr #0 { +; CHECK-LABEL: vcvt_u32_30: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f32.u32 q0, q0, #30 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = uitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define dso_local <4 x float> @vcvt_u32_31(<4 x i32> %0) local_unnamed_addr #0 { +; CHECK-LABEL: vcvt_u32_31: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f32.u32 q0, q0, #31 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = uitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +} + +define dso_local <8 x half> @vcvt_u16_1(<8 x i16> %0) local_unnamed_addr #0 { +; CHECK-LABEL: vcvt_u16_1: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f16.u16 q0, q0, #1 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = uitofp <8 x i16> %0 to <8 x half> + %3 = fmul ninf <8 x half> %2, + ret <8 x half> %3 +} + +define dso_local <8 x half> @vcvt_u16_2(<8 x i16> %0) local_unnamed_addr #0 { +; CHECK-LABEL: vcvt_u16_2: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f16.u16 q0, q0, #2 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = uitofp <8 x i16> %0 to <8 x half> + %3 = fmul ninf <8 x half> %2, + ret <8 x half> %3 +} + +define dso_local <8 x half> @vcvt_u16_3(<8 x i16> %0) local_unnamed_addr #0 { +; CHECK-LABEL: vcvt_u16_3: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f16.u16 q0, q0, #3 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = uitofp <8 x i16> %0 to <8 x half> + %3 = fmul ninf <8 x half> %2, + ret <8 x half> %3 +} + +define dso_local <8 x half> @vcvt_u16_4(<8 x i16> %0) local_unnamed_addr #0 { +; CHECK-LABEL: vcvt_u16_4: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f16.u16 q0, q0, #4 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = uitofp <8 x i16> %0 to <8 x half> + %3 = fmul ninf <8 x half> %2, + ret <8 x half> %3 +} + +define dso_local <8 x half> @vcvt_u16_5(<8 x i16> %0) local_unnamed_addr #0 { +; CHECK-LABEL: vcvt_u16_5: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f16.u16 q0, q0, #5 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = uitofp <8 x i16> %0 to <8 x half> + %3 = fmul ninf <8 x half> %2, + ret <8 x half> %3 +} + +define dso_local <8 x half> @vcvt_u16_6(<8 x i16> %0) local_unnamed_addr #0 { +; CHECK-LABEL: vcvt_u16_6: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f16.u16 q0, q0, #6 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = uitofp <8 x i16> %0 to <8 x half> + %3 = fmul ninf <8 x half> %2, + ret <8 x half> %3 +} + +define dso_local <8 x half> @vcvt_u16_7(<8 x i16> %0) local_unnamed_addr #0 { +; CHECK-LABEL: vcvt_u16_7: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f16.u16 q0, q0, #7 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = uitofp <8 x i16> %0 to <8 x half> + %3 = fmul ninf <8 x half> %2, + ret <8 x half> %3 +} + +define dso_local <8 x half> @vcvt_u16_8(<8 x i16> %0) local_unnamed_addr #0 { +; CHECK-LABEL: vcvt_u16_8: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f16.u16 q0, q0, #8 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = uitofp <8 x i16> %0 to <8 x half> + %3 = fmul ninf <8 x half> %2, + ret <8 x half> %3 +} + +define dso_local <8 x half> @vcvt_u16_9(<8 x i16> %0) local_unnamed_addr #0 { +; CHECK-LABEL: vcvt_u16_9: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f16.u16 q0, q0, #9 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = uitofp <8 x i16> %0 to <8 x half> + %3 = fmul ninf <8 x half> %2, + ret <8 x half> %3 +} + +define dso_local <8 x half> @vcvt_u16_10(<8 x i16> %0) local_unnamed_addr #0 { +; CHECK-LABEL: vcvt_u16_10: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f16.u16 q0, q0, #10 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = uitofp <8 x i16> %0 to <8 x half> + %3 = fmul ninf <8 x half> %2, + ret <8 x half> %3 +} + +define dso_local <8 x half> @vcvt_u16_11(<8 x i16> %0) local_unnamed_addr #0 { +; CHECK-LABEL: vcvt_u16_11: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f16.u16 q0, q0, #11 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = uitofp <8 x i16> %0 to <8 x half> + %3 = fmul ninf <8 x half> %2, + ret <8 x half> %3 +} + +define dso_local <8 x half> @vcvt_u16_12(<8 x i16> %0) local_unnamed_addr #0 { +; CHECK-LABEL: vcvt_u16_12: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f16.u16 q0, q0, #12 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = uitofp <8 x i16> %0 to <8 x half> + %3 = fmul ninf <8 x half> %2, + ret <8 x half> %3 +} + +define dso_local <8 x half> @vcvt_u16_13(<8 x i16> %0) local_unnamed_addr #0 { +; CHECK-LABEL: vcvt_u16_13: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f16.u16 q0, q0, #13 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = uitofp <8 x i16> %0 to <8 x half> + %3 = fmul ninf <8 x half> %2, + ret <8 x half> %3 +} + +define dso_local <8 x half> @vcvt_u16_14(<8 x i16> %0) local_unnamed_addr #0 { +; CHECK-LABEL: vcvt_u16_14: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f16.u16 q0, q0, #14 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = uitofp <8 x i16> %0 to <8 x half> + %3 = fmul ninf <8 x half> %2, + ret <8 x half> %3 +} + +define dso_local <8 x half> @vcvt_u16_15(<8 x i16> %0) local_unnamed_addr #0 { +; CHECK-LABEL: vcvt_u16_15: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov.i16 q1, #0x200 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f16.u16 q0, q0 +; CHECK-NEXT: vmul.f16 q0, q0, q1 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = uitofp <8 x i16> %0 to <8 x half> + %3 = fmul ninf <8 x half> %2, + ret <8 x half> %3 +} + +define dso_local <8 x half> @vcvt_u16_inf(<8 x i16> %0) local_unnamed_addr #0 { +; CHECK-LABEL: vcvt_u16_inf: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov.i16 q1, #0x400 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f16.u16 q0, q0 +; CHECK-NEXT: vmul.f16 q0, q0, q1 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = uitofp <8 x i16> %0 to <8 x half> + %3 = fmul <8 x half> %2, + ret <8 x half> %3 +} + +define dso_local <8 x half> @vcvt_s16_inf(<8 x i16> %0) local_unnamed_addr #0 { +; CHECK-LABEL: vcvt_s16_inf: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov.i16 q1, #0x400 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: vcvt.f16.s16 q0, q0 +; CHECK-NEXT: vmul.f16 q0, q0, q1 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = sitofp <8 x i16> %0 to <8 x half> + %3 = fmul <8 x half> %2, + ret <8 x half> %3 +} + +define dso_local <4 x float> @vcvt_bad_imm(<4 x i32> %0) local_unnamed_addr { +; CHECK-LABEL: vcvt_bad_imm: +; CHECK: @ %bb.0: +; CHECK-NEXT: vmov d1, r2, r3 +; CHECK-NEXT: vmov d0, r0, r1 +; CHECK-NEXT: adr r0, .LCPI94_0 +; CHECK-NEXT: vldrw.u32 q1, [r0] +; CHECK-NEXT: vcvt.f32.s32 q0, q0 +; CHECK-NEXT: vmul.f32 q0, q0, q1 +; CHECK-NEXT: vmov r0, r1, d0 +; CHECK-NEXT: vmov r2, r3, d1 +; CHECK-NEXT: bx lr + %2 = sitofp <4 x i32> %0 to <4 x float> + %3 = fmul <4 x float> %2, + ret <4 x float> %3 +}