Index: llvm/docs/LangRef.rst =================================================================== --- llvm/docs/LangRef.rst +++ llvm/docs/LangRef.rst @@ -14754,6 +14754,60 @@ is returned. If the result overflows, the result is an infinity with the same sign. +'``llvm.frexp.*``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +This is an overloaded intrinsic. You can use ``llvm.frexp`` on any +floating point or vector of floating point type. Not all targets support +all types however. + +:: + + declare { float, i32 } @llvm.frexp.f32.i32(float %Val) + declare { double, i32 } @llvm.frexp.f64.i32(double %Val) + declare { x86_fp80, i32 } @llvm.frexp.f80.i32(x86_fp80 %Val) + declare { fp128, i32 } @llvm.frexp.f128.i32(fp128 %Val) + declare { ppc_fp128, i32 } @llvm.frexp.ppcf128.i32(ppc_fp128 %Val) + declare { <2 x float>, <2 x i32> } @llvm.frexp.v2f32.v2i32(<2 x float> %Val) + +Overview: +""""""""" + +The '``llvm.frexp.*``' intrinsics perform the frexp function. + +Arguments: +"""""""""" + +The argument is a :ref:`floating-point ` or +:ref:`vector ` of floating-point values. Returns two values +in a struct. The first struct field matches the argument type, and the +second field is an integer or a vector of integer values with the same +number of elements as the argument. + +Semantics: +"""""""""" + +This intrinsic splits a floating point value into a normalized +fractional component and integral exponent. + +For a non-zero argument, returns the argument multiplied by some power +of two such that the absolute value of the returned value is in the +range [0.5, 1.0), with the same sign as the argument. The second +result is an integer such that the first result raised to the power of +the second result is the input argument. + +If the argument is a zero, returns a zero with the same sign and a 0 +exponent. + +If the argument is a NaN, a NaN is returned and the returned exponent +is unspecified. + +If the argument is an infinity, returns an infinity with the same sign +and an unspecified exponent. + '``llvm.log.*``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^ Index: llvm/docs/ReleaseNotes.rst =================================================================== --- llvm/docs/ReleaseNotes.rst +++ llvm/docs/ReleaseNotes.rst @@ -63,6 +63,8 @@ * Introduced new ``llvm.ldexp`` and ``llvm.experimental.constrained.ldexp`` intrinsics. +* Introduced new ``llvm.frexp`` intrinsic. + * The constant expression variants of the following instructions have been removed: Index: llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h =================================================================== --- llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h +++ llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h @@ -1832,6 +1832,13 @@ return buildInstr(TargetOpcode::G_FLDEXP, {Dst}, {Src0, Src1}, Flags); } + /// Build and insert \p Fract, \p Exp = G_FFREXP \p Src + MachineInstrBuilder + buildFFrexp(const DstOp &Fract, const DstOp &Exp, const SrcOp &Src, + std::optional Flags = std::nullopt) { + return buildInstr(TargetOpcode::G_FFREXP, {Fract, Exp}, {Src}, Flags); + } + /// Build and insert \p Res = G_FCOPYSIGN \p Op0, \p Op1 MachineInstrBuilder buildFCopysign(const DstOp &Dst, const SrcOp &Src0, const SrcOp &Src1) { Index: llvm/include/llvm/CodeGen/ISDOpcodes.h =================================================================== --- llvm/include/llvm/CodeGen/ISDOpcodes.h +++ llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -931,6 +931,12 @@ FPOWI, /// FLDEXP - ldexp, inspired by libm (op0 * 2**op1). FLDEXP, + + /// FFREXP - frexp, extract fractional and exponent component of a + /// floating-point value. Returns the two components as separate return + /// values. + FFREXP, + FLOG, FLOG2, FLOG10, Index: llvm/include/llvm/CodeGen/RuntimeLibcalls.h =================================================================== --- llvm/include/llvm/CodeGen/RuntimeLibcalls.h +++ llvm/include/llvm/CodeGen/RuntimeLibcalls.h @@ -74,6 +74,10 @@ /// UNKNOWN_LIBCALL if there is none. Libcall getLDEXP(EVT RetVT); + /// getFREXP - Return the FREXP_* value for the given types, or + /// UNKNOWN_LIBCALL if there is none. + Libcall getFREXP(EVT RetVT); + /// Return the SYNC_FETCH_AND_* value for the given opcode and type, or /// UNKNOWN_LIBCALL if there is none. Libcall getSYNC(unsigned Opc, MVT VT); Index: llvm/include/llvm/IR/Intrinsics.td =================================================================== --- llvm/include/llvm/IR/Intrinsics.td +++ llvm/include/llvm/IR/Intrinsics.td @@ -1041,6 +1041,9 @@ // TODO: int operand should be constrained to same number of elements as the result. def int_ldexp : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, llvm_anyint_ty]>; + + // TODO: Should constrain all element counts to match + def int_frexp : DefaultAttrsIntrinsic<[llvm_anyfloat_ty, llvm_anyint_ty], [LLVMMatchType<0>]>; } def int_minnum : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], Index: llvm/include/llvm/IR/RuntimeLibcalls.def =================================================================== --- llvm/include/llvm/IR/RuntimeLibcalls.def +++ llvm/include/llvm/IR/RuntimeLibcalls.def @@ -284,6 +284,11 @@ HANDLE_LIBCALL(LDEXP_F80, "ldexpl") HANDLE_LIBCALL(LDEXP_F128, "ldexpl") HANDLE_LIBCALL(LDEXP_PPCF128, "ldexpl") +HANDLE_LIBCALL(FREXP_F32, "frexpf") +HANDLE_LIBCALL(FREXP_F64, "frexp") +HANDLE_LIBCALL(FREXP_F80, "frexpl") +HANDLE_LIBCALL(FREXP_F128, "frexpl") +HANDLE_LIBCALL(FREXP_PPCF128, "frexpl") // Floating point environment HANDLE_LIBCALL(FEGETENV, "fegetenv") Index: llvm/include/llvm/Support/TargetOpcodes.def =================================================================== --- llvm/include/llvm/Support/TargetOpcodes.def +++ llvm/include/llvm/Support/TargetOpcodes.def @@ -619,6 +619,9 @@ /// Floating point x * 2^n HANDLE_TARGET_OPCODE(G_FLDEXP) +/// Floating point extract fraction and exponent. +HANDLE_TARGET_OPCODE(G_FFREXP) + /// Generic FP negation. HANDLE_TARGET_OPCODE(G_FNEG) Index: llvm/include/llvm/Target/GenericOpcodes.td =================================================================== --- llvm/include/llvm/Target/GenericOpcodes.td +++ llvm/include/llvm/Target/GenericOpcodes.td @@ -930,6 +930,13 @@ let hasSideEffects = false; } +// Floating point frexp +def G_FFREXP : GenericInstruction { + let OutOperandList = (outs type0:$dst0, type1:$dst1); + let InOperandList = (ins type0:$src0); + let hasSideEffects = false; +} + // Floating point ceiling of a value. def G_FCEIL : GenericInstruction { let OutOperandList = (outs type0:$dst); Index: llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -2175,6 +2175,13 @@ getOrCreateVReg(*CI.getArgOperand(0)), MachineInstr::copyFlagsFromInstruction(CI)); return true; + case Intrinsic::frexp: { + ArrayRef VRegs = getOrCreateVRegs(CI); + MIRBuilder.buildFFrexp(VRegs[0], VRegs[1], + getOrCreateVReg(*CI.getArgOperand(0)), + MachineInstr::copyFlagsFromInstruction(CI)); + return true; + } case Intrinsic::memcpy_inline: return translateMemFunc(CI, MIRBuilder, TargetOpcode::G_MEMCPY_INLINE); case Intrinsic::memcpy: Index: llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -2584,6 +2584,19 @@ return UnableToLegalize; } + case TargetOpcode::G_FFREXP: { + Observer.changingInstr(MI); + + if (TypeIdx == 0) { + widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT); + widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC); + } else { + widenScalarDst(MI, WideTy, 1); + } + + Observer.changedInstr(MI); + return Legalized; + } case TargetOpcode::G_INTTOPTR: if (TypeIdx != 1) return UnableToLegalize; @@ -4235,6 +4248,7 @@ case G_STRICT_FMUL: case G_STRICT_FMA: case G_STRICT_FLDEXP: + case G_FFREXP: return fewerElementsVectorMultiEltType(GMI, NumElts); case G_ICMP: case G_FCMP: Index: llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -133,8 +133,11 @@ SDValue N1, SDValue N2, ArrayRef Mask) const; + SDValue ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, + TargetLowering::ArgListTy &&Args, bool isSigned); SDValue ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned); + void ExpandFrexpLibCall(SDNode *Node, SmallVectorImpl &Results); void ExpandFPLibCall(SDNode *Node, RTLIB::Libcall LC, SmallVectorImpl &Results); void ExpandFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32, @@ -173,6 +176,7 @@ SDValue ExpandFABS(SDNode *Node) const; SDValue ExpandFNEG(SDNode *Node) const; SDValue expandLdexp(SDNode *Node) const; + SDValue expandFrexp(SDNode *Node) const; SDValue ExpandLegalINT_TO_FP(SDNode *Node, SDValue &Chain); void PromoteLegalINT_TO_FP(SDNode *N, const SDLoc &dl, @@ -2028,18 +2032,8 @@ // by-reg argument. If it does fit into a single register, return the result // and leave the Hi part unset. SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, + TargetLowering::ArgListTy &&Args, bool isSigned) { - TargetLowering::ArgListTy Args; - TargetLowering::ArgListEntry Entry; - for (const SDValue &Op : Node->op_values()) { - EVT ArgVT = Op.getValueType(); - Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); - Entry.Node = Op; - Entry.Ty = ArgTy; - Entry.IsSExt = TLI.shouldSignExtendTypeInLibCall(ArgVT, isSigned); - Entry.IsZExt = !TLI.shouldSignExtendTypeInLibCall(ArgVT, isSigned); - Args.push_back(Entry); - } SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), TLI.getPointerTy(DAG.getDataLayout())); @@ -2085,6 +2079,62 @@ return CallInfo.first; } +SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, + bool isSigned) { + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + for (const SDValue &Op : Node->op_values()) { + EVT ArgVT = Op.getValueType(); + Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); + Entry.Node = Op; + Entry.Ty = ArgTy; + Entry.IsSExt = TLI.shouldSignExtendTypeInLibCall(ArgVT, isSigned); + Entry.IsZExt = !Entry.IsSExt; + Args.push_back(Entry); + } + + return ExpandLibCall(LC, Node, std::move(Args), isSigned); +} + +void SelectionDAGLegalize::ExpandFrexpLibCall( + SDNode *Node, SmallVectorImpl &Results) { + SDLoc dl(Node); + EVT VT = Node->getValueType(0); + EVT ExpVT = Node->getValueType(1); + + SDValue FPOp = Node->getOperand(0); + + EVT ArgVT = FPOp.getValueType(); + Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); + + TargetLowering::ArgListEntry FPArgEntry; + FPArgEntry.Node = FPOp; + FPArgEntry.Ty = ArgTy; + + SDValue StackSlot = DAG.CreateStackTemporary(ExpVT); + TargetLowering::ArgListEntry PtrArgEntry; + PtrArgEntry.Node = StackSlot; + PtrArgEntry.Ty = PointerType::get(*DAG.getContext(), + DAG.getDataLayout().getAllocaAddrSpace()); + + TargetLowering::ArgListTy Args = {FPArgEntry, PtrArgEntry}; + + RTLIB::Libcall LC = RTLIB::getFREXP(VT); + SDValue Call = ExpandLibCall(LC, Node, std::move(Args), false); + + Results.push_back(Call); + + // FIXME: Get type of int for libcall declaration and cast + + int FrameIdx = cast(StackSlot)->getIndex(); + auto PtrInfo = + MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FrameIdx); + + SDValue LoadExp = + DAG.getLoad(ExpVT, dl, Call.getValue(1), StackSlot, PtrInfo); + Results.push_back(LoadExp); +} + void SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node, RTLIB::Libcall LC, SmallVectorImpl &Results) { @@ -2429,6 +2479,125 @@ return DAG.getNode(ISD::FMUL, dl, VT, NewX, AsFP); } +SDValue SelectionDAGLegalize::expandFrexp(SDNode *Node) const { + SDLoc dl(Node); + SDValue Val = Node->getOperand(0); + EVT VT = Val.getValueType(); + EVT ExpVT = Node->getValueType(1); + EVT AsIntVT = VT.changeTypeToInteger(); + if (AsIntVT == EVT()) // TODO: How to handle f80? + return SDValue(); + + const fltSemantics &FltSem = SelectionDAG::EVTToAPFloatSemantics(VT); + const APFloat::ExponentType MinExpVal = APFloat::semanticsMinExponent(FltSem); + const unsigned Precision = APFloat::semanticsPrecision(FltSem); + const unsigned BitSize = VT.getScalarSizeInBits(); + + // TODO: Could introduce control flow and skip over the denormal handling. + + // scale_up = fmul value, scalbn(1.0, precision + 1) + // extracted_exp = (bitcast value to uint) >> precision - 1 + // biased_exp = extracted_exp + min_exp + // extracted_fract = (bitcast value to uint) & (fract_mask | sign_mask) + // + // is_denormal = val < smallest_normalized + // computed_fract = is_denormal ? scale_up : extracted_fract + // computed_exp = is_denormal ? biased_exp + (-precision - 1) : biased_exp + // + // result_0 = (!isfinite(val) || iszero(val)) ? val : computed_fract + // result_1 = (!isfinite(val) || iszero(val)) ? 0 : computed_exp + + SDValue NegSmallestNormalizedInt = DAG.getConstant( + APFloat::getSmallestNormalized(FltSem, true).bitcastToAPInt(), dl, + AsIntVT); + + SDValue SmallestNormalizedInt = DAG.getConstant( + APFloat::getSmallestNormalized(FltSem, false).bitcastToAPInt(), dl, + AsIntVT); + + // Masks out the exponent bits. + SDValue ExpMask = + DAG.getConstant(APFloat::getInf(FltSem).bitcastToAPInt(), dl, AsIntVT); + + // Mask out the exponent part of the value. + // + // e.g, for f32 FractSignMaskVal = 0x807fffff + APInt FractSignMaskVal = APInt::getBitsSet(BitSize, 0, Precision - 1); + FractSignMaskVal.setBit(BitSize - 1); // Set the sign bit + + APInt SignMaskVal = APInt::getSignedMaxValue(BitSize); + SDValue SignMask = DAG.getConstant(SignMaskVal, dl, AsIntVT); + + SDValue FractSignMask = DAG.getConstant(FractSignMaskVal, dl, AsIntVT); + + const APFloat One(FltSem, "1.0"); + // Scale a possible denormal input. + // e.g., for f64, 0x1p+54 + APFloat ScaleUpKVal = + scalbn(One, Precision + 1, APFloat::rmNearestTiesToEven); + + SDValue ScaleUpK = DAG.getConstantFP(ScaleUpKVal, dl, VT); + SDValue ScaleUp = DAG.getNode(ISD::FMUL, dl, VT, Val, ScaleUpK); + + EVT SetCCVT = + TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); + + SDValue AsInt = DAG.getNode(ISD::BITCAST, dl, AsIntVT, Val); + + SDValue Abs = DAG.getNode(ISD::AND, dl, AsIntVT, AsInt, SignMask); + + SDValue AddNegSmallestNormal = + DAG.getNode(ISD::ADD, dl, AsIntVT, Abs, NegSmallestNormalizedInt); + SDValue DenormOrZero = DAG.getSetCC(dl, SetCCVT, AddNegSmallestNormal, + NegSmallestNormalizedInt, ISD::SETULE); + + SDValue IsDenormal = + DAG.getSetCC(dl, SetCCVT, Abs, SmallestNormalizedInt, ISD::SETULT); + + SDValue MinExp = DAG.getConstant(MinExpVal, dl, ExpVT); + SDValue Zero = DAG.getConstant(0, dl, ExpVT); + + SDValue ScaledAsInt = DAG.getNode(ISD::BITCAST, dl, AsIntVT, ScaleUp); + SDValue ScaledSelect = + DAG.getNode(ISD::SELECT, dl, AsIntVT, IsDenormal, ScaledAsInt, AsInt); + + SDValue ExpMaskScaled = + DAG.getNode(ISD::AND, dl, AsIntVT, ScaledAsInt, ExpMask); + + SDValue ScaledValue = + DAG.getNode(ISD::SELECT, dl, AsIntVT, IsDenormal, ExpMaskScaled, Abs); + + // Extract the exponent bits. + SDValue ExponentShiftAmt = + DAG.getShiftAmountConstant(Precision - 1, AsIntVT, dl); + SDValue ShiftedExp = + DAG.getNode(ISD::SRL, dl, AsIntVT, ScaledValue, ExponentShiftAmt); + SDValue Exp = DAG.getSExtOrTrunc(ShiftedExp, dl, ExpVT); + + SDValue NormalBiasedExp = DAG.getNode(ISD::ADD, dl, ExpVT, Exp, MinExp); + SDValue DenormalOffset = DAG.getConstant(-Precision - 1, dl, ExpVT); + SDValue DenormalExpBias = + DAG.getNode(ISD::SELECT, dl, ExpVT, IsDenormal, DenormalOffset, Zero); + + SDValue MaskedFractAsInt = + DAG.getNode(ISD::AND, dl, AsIntVT, ScaledSelect, FractSignMask); + const APFloat Half(FltSem, "0.5"); + SDValue FPHalf = DAG.getConstant(Half.bitcastToAPInt(), dl, AsIntVT); + SDValue Or = DAG.getNode(ISD::OR, dl, AsIntVT, MaskedFractAsInt, FPHalf); + SDValue MaskedFract = DAG.getNode(ISD::BITCAST, dl, VT, Or); + + SDValue ComputedExp = + DAG.getNode(ISD::ADD, dl, ExpVT, NormalBiasedExp, DenormalExpBias); + + SDValue Result0 = + DAG.getNode(ISD::SELECT, dl, VT, DenormOrZero, Val, MaskedFract); + + SDValue Result1 = + DAG.getNode(ISD::SELECT, dl, ExpVT, DenormOrZero, Zero, ComputedExp); + + return DAG.getMergeValues({Result0, Result1}, dl); +} + /// This function is responsible for legalizing a /// INT_TO_FP operation of the specified operand when the target requests that /// we expand it. At this point, we know that the result and operand types are @@ -3383,6 +3552,19 @@ break; } + case ISD::FFREXP: { + RTLIB::Libcall LC = RTLIB::getFREXP(Node->getValueType(0)); + // Use the LibCall instead, it is very likely faster + // FIXME: Use separate LibCall action. + if (TLI.getLibcallName(LC)) + break; + + if (SDValue Expanded = expandFrexp(Node)) { + Results.push_back(Expanded); + Results.push_back(Expanded.getValue(1)); + } + break; + } case ISD::FMAD: llvm_unreachable("Illegal fmad should never be formed"); @@ -4285,6 +4467,10 @@ ExpandFPLibCall(Node, RTLIB::LDEXP_F32, RTLIB::LDEXP_F64, RTLIB::LDEXP_F80, RTLIB::LDEXP_F128, RTLIB::LDEXP_PPCF128, Results); break; + case ISD::FFREXP: { + ExpandFrexpLibCall(Node, Results); + break; + } case ISD::FPOWI: case ISD::STRICT_FPOWI: { RTLIB::Libcall LC = RTLIB::getPOWI(Node->getSimpleValueType(0)); @@ -5043,6 +5229,17 @@ Results.push_back(Tmp3); Results.push_back(Tmp3.getValue(1)); break; + case ISD::FFREXP: { + Tmp1 = DAG.getNode(ISD::FP_EXTEND, dl, NVT, Node->getOperand(0)); + Tmp2 = DAG.getNode(ISD::FFREXP, dl, {NVT, Node->getValueType(1)}, Tmp1); + + Results.push_back( + DAG.getNode(ISD::FP_ROUND, dl, OVT, Tmp2, + DAG.getIntPtrConstant(0, dl, /*isTarget=*/true))); + + Results.push_back(Tmp2.getValue(1)); + break; + } case ISD::FFLOOR: case ISD::FCEIL: case ISD::FRINT: Index: llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -2327,6 +2327,7 @@ case ISD::FPOWI: case ISD::FLDEXP: R = PromoteFloatRes_ExpOp(N); break; + case ISD::FFREXP: R = PromoteFloatRes_FFREXP(N); break; case ISD::FP_ROUND: R = PromoteFloatRes_FP_ROUND(N); break; case ISD::LOAD: R = PromoteFloatRes_LOAD(N); break; @@ -2506,6 +2507,17 @@ return DAG.getNode(N->getOpcode(), SDLoc(N), NVT, Op0, Op1); } +SDValue DAGTypeLegalizer::PromoteFloatRes_FFREXP(SDNode *N) { + EVT VT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + SDValue Op = GetPromotedFloat(N->getOperand(0)); + SDValue Res = + DAG.getNode(N->getOpcode(), SDLoc(N), {NVT, N->getValueType(1)}, Op); + + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + return Res; +} + // Explicit operation to reduce precision. Reduce the value to half precision // and promote it back to the legal type. SDValue DAGTypeLegalizer::PromoteFloatRes_FP_ROUND(SDNode *N) { Index: llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -288,6 +288,9 @@ case ISD::IS_FPCLASS: Res = PromoteIntRes_IS_FPCLASS(N); break; + case ISD::FFREXP: + Res = PromoteIntRes_FFREXP(N); + break; } // If the result is null then the sub-method took care of registering it. @@ -1215,6 +1218,18 @@ return DAG.getNode(ISD::IS_FPCLASS, DL, NResVT, Arg, Test); } +SDValue DAGTypeLegalizer::PromoteIntRes_FFREXP(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(1)); + EVT VT = N->getValueType(0); + + SDLoc dl(N); + SDValue Res = + DAG.getNode(N->getOpcode(), dl, DAG.getVTList(VT, NVT), N->getOperand(0)); + + ReplaceValueWith(SDValue(N, 0), Res); + return Res.getValue(1); +} + SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N) { SDValue LHS = GetPromotedInteger(N->getOperand(0)); SDValue RHS = N->getOperand(1); Index: llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h =================================================================== --- llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -333,6 +333,7 @@ SDValue PromoteIntRes_MLOAD(MaskedLoadSDNode *N); SDValue PromoteIntRes_MGATHER(MaskedGatherSDNode *N); SDValue PromoteIntRes_Overflow(SDNode *N); + SDValue PromoteIntRes_FFREXP(SDNode *N); SDValue PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo); SDValue PromoteIntRes_Select(SDNode *N); SDValue PromoteIntRes_SELECT_CC(SDNode *N); @@ -694,6 +695,7 @@ SDValue PromoteFloatRes_FCOPYSIGN(SDNode *N); SDValue PromoteFloatRes_FMAD(SDNode *N); SDValue PromoteFloatRes_ExpOp(SDNode *N); + SDValue PromoteFloatRes_FFREXP(SDNode *N); SDValue PromoteFloatRes_FP_ROUND(SDNode *N); SDValue PromoteFloatRes_LOAD(SDNode *N); SDValue PromoteFloatRes_SELECT(SDNode *N); @@ -801,6 +803,7 @@ SDValue ScalarizeVecRes_IS_FPCLASS(SDNode *N); SDValue ScalarizeVecRes_FIX(SDNode *N); + SDValue ScalarizeVecRes_FFREXP(SDNode *N, unsigned ResNo); // Vector Operand Scalarization: <1 x ty> -> ty. bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo); @@ -849,6 +852,7 @@ void SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_FFREXP(SDNode *N, unsigned ResNo, SDValue &Lo, SDValue &Hi); void SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_ExtVecInRegOp(SDNode *N, SDValue &Lo, SDValue &Hi); Index: llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -412,6 +412,7 @@ case ISD::SMULO: case ISD::UMULO: case ISD::FCANONICALIZE: + case ISD::FFREXP: case ISD::SADDSAT: case ISD::UADDSAT: case ISD::SSUBSAT: @@ -979,7 +980,9 @@ return; } - Results.push_back(DAG.UnrollVectorOp(Node)); + SDValue Unrolled = DAG.UnrollVectorOp(Node); + for (unsigned I = 0, E = Unrolled->getNumValues(); I != E; ++I) + Results.push_back(Unrolled.getValue(I)); } SDValue VectorLegalizer::ExpandSELECT(SDNode *Node) { Index: llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -113,7 +113,9 @@ case ISD::FCANONICALIZE: R = ScalarizeVecRes_UnaryOp(N); break; - + case ISD::FFREXP: + R = ScalarizeVecRes_FFREXP(N, ResNo); + break; case ISD::ADD: case ISD::AND: case ISD::FADD: @@ -222,6 +224,34 @@ Op2, N->getFlags()); } +SDValue DAGTypeLegalizer::ScalarizeVecRes_FFREXP(SDNode *N, unsigned ResNo) { + assert(N->getValueType(0).getVectorNumElements() == 1 && + "Unexpected vector type!"); + SDValue Elt = GetScalarizedVector(N->getOperand(0)); + + EVT VT0 = N->getValueType(0); + EVT VT1 = N->getValueType(1); + SDLoc dl(N); + + SDNode *ScalarNode = + DAG.getNode(N->getOpcode(), dl, + {VT0.getScalarType(), VT1.getScalarType()}, Elt) + .getNode(); + + // Replace the other vector result not being explicitly scalarized here. + unsigned OtherNo = 1 - ResNo; + EVT OtherVT = N->getValueType(OtherNo); + if (getTypeAction(OtherVT) == TargetLowering::TypeScalarizeVector) { + SetScalarizedVector(SDValue(N, OtherNo), SDValue(ScalarNode, OtherNo)); + } else { + SDValue OtherVal = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, OtherVT, + SDValue(ScalarNode, OtherNo)); + ReplaceValueWith(SDValue(N, OtherNo), OtherVal); + } + + return SDValue(ScalarNode, ResNo); +} + SDValue DAGTypeLegalizer::ScalarizeVecRes_StrictFPOp(SDNode *N) { EVT VT = N->getValueType(0).getVectorElementType(); unsigned NumOpers = N->getNumOperands(); @@ -1080,6 +1110,9 @@ case ISD::FCANONICALIZE: SplitVecRes_UnaryOp(N, Lo, Hi); break; + case ISD::FFREXP: + SplitVecRes_FFREXP(N, ResNo, Lo, Hi); + break; case ISD::ANY_EXTEND: case ISD::SIGN_EXTEND: @@ -2294,6 +2327,45 @@ Hi = DAG.getNode(Opcode, dl, HiVT, {Hi, MaskHi, EVLHi}, Flags); } +void DAGTypeLegalizer::SplitVecRes_FFREXP(SDNode *N, unsigned ResNo, + SDValue &Lo, SDValue &Hi) { + // Get the dest types - they may not match the input types, e.g. int_to_fp. + EVT LoVT, HiVT; + EVT LoVT1, HiVT1; + SDLoc dl(N); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + std::tie(LoVT1, HiVT1) = DAG.GetSplitDestVTs(N->getValueType(1)); + + // If the input also splits, handle it directly for a compile time speedup. + // Otherwise split it by hand. + EVT InVT = N->getOperand(0).getValueType(); + if (getTypeAction(InVT) == TargetLowering::TypeSplitVector) + GetSplitVector(N->getOperand(0), Lo, Hi); + else + std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0); + + Lo = DAG.getNode(N->getOpcode(), dl, {LoVT, LoVT1}, Lo); + Hi = DAG.getNode(N->getOpcode(), dl, {HiVT, HiVT1}, Hi); + Lo->setFlags(N->getFlags()); + Hi->setFlags(N->getFlags()); + + SDNode *HiNode = Hi.getNode(); + SDNode *LoNode = Lo.getNode(); + + // Replace the other vector result not being explicitly split here. + unsigned OtherNo = 1 - ResNo; + EVT OtherVT = N->getValueType(OtherNo); + if (getTypeAction(OtherVT) == TargetLowering::TypeSplitVector) { + SetSplitVector(SDValue(N, OtherNo), SDValue(LoNode, OtherNo), + SDValue(HiNode, OtherNo)); + } else { + SDValue OtherVal = + DAG.getNode(ISD::CONCAT_VECTORS, dl, OtherVT, SDValue(LoNode, OtherNo), + SDValue(HiNode, OtherNo)); + ReplaceValueWith(SDValue(N, OtherNo), OtherVal); + } +} + void DAGTypeLegalizer::SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo, SDValue &Hi) { SDLoc dl(N); Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -11636,16 +11636,11 @@ } SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) { - assert(N->getNumValues() == 1 && - "Can't unroll a vector with multiple results!"); - EVT VT = N->getValueType(0); - unsigned NE = VT.getVectorNumElements(); EVT EltVT = VT.getVectorElementType(); - SDLoc dl(N); + unsigned NE = VT.getVectorNumElements(); - SmallVector Scalars; - SmallVector Operands(N->getNumOperands()); + SDLoc dl(N); // If ResNE is 0, fully unroll the vector op. if (ResNE == 0) @@ -11653,6 +11648,40 @@ else if (NE > ResNE) NE = ResNE; + if (N->getNumValues() == 2) { + SmallVector Scalars0, Scalars1; + SmallVector Operands(N->getNumOperands()); + EVT VT1 = N->getValueType(1); + EVT EltVT1 = VT1.getVectorElementType(); + + unsigned i; + for (i = 0; i != NE; ++i) { + for (unsigned j = 0, e = N->getNumOperands(); j != e; ++j) { + SDValue Operand = N->getOperand(j); + EVT OperandVT = Operand.getValueType(); + + // A vector operand; extract a single element. + EVT OperandEltVT = OperandVT.getVectorElementType(); + Operands[j] = getNode(ISD::EXTRACT_VECTOR_ELT, dl, OperandEltVT, + Operand, getVectorIdxConstant(i, dl)); + } + + SDValue EltOp = getNode(N->getOpcode(), dl, {EltVT, EltVT1}, Operands); + Scalars0.push_back(EltOp); + Scalars1.push_back(EltOp.getValue(1)); + } + + SDValue Vec0 = getBuildVector(VT, dl, Scalars0); + SDValue Vec1 = getBuildVector(VT1, dl, Scalars1); + return getMergeValues({Vec0, Vec1}, dl); + } + + assert(N->getNumValues() == 1 && + "Can't unroll a vector with multiple results!"); + + SmallVector Scalars; + SmallVector Operands(N->getNumOperands()); + unsigned i; for (i= 0; i != NE; ++i) { for (unsigned j = 0, e = N->getNumOperands(); j != e; ++j) { Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -6468,6 +6468,14 @@ getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), Flags)); return; + case Intrinsic::frexp: { + SmallVector ValueVTs; + ComputeValueVTs(TLI, DAG.getDataLayout(), I.getType(), ValueVTs); + SDVTList VTs = DAG.getVTList(ValueVTs); + setValue(&I, + DAG.getNode(ISD::FFREXP, sdl, VTs, getValue(I.getArgOperand(0)))); + return; + } case Intrinsic::arithmetic_fence: { setValue(&I, DAG.getNode(ISD::ARITH_FENCE, sdl, getValue(I.getArgOperand(0)).getValueType(), Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -285,6 +285,7 @@ case ISD::FLDEXP: return "fldexp"; case ISD::STRICT_FLDEXP: return "strict_fldexp"; + case ISD::FFREXP: return "ffrexp"; case ISD::FPOWI: return "fpowi"; case ISD::STRICT_FPOWI: return "strict_fpowi"; case ISD::SETCC: return "setcc"; Index: llvm/lib/CodeGen/TargetLoweringBase.cpp =================================================================== --- llvm/lib/CodeGen/TargetLoweringBase.cpp +++ llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -215,6 +215,11 @@ setLibcallName(RTLIB::LDEXP_F80, nullptr); setLibcallName(RTLIB::LDEXP_F128, nullptr); setLibcallName(RTLIB::LDEXP_PPCF128, nullptr); + + setLibcallName(RTLIB::FREXP_F32, nullptr); + setLibcallName(RTLIB::FREXP_F80, nullptr); + setLibcallName(RTLIB::FREXP_F128, nullptr); + setLibcallName(RTLIB::FREXP_PPCF128, nullptr); } } @@ -510,6 +515,11 @@ LDEXP_PPCF128); } +RTLIB::Libcall RTLIB::getFREXP(EVT RetVT) { + return getFPLibCall(RetVT, FREXP_F32, FREXP_F64, FREXP_F80, FREXP_F128, + FREXP_PPCF128); +} + RTLIB::Libcall RTLIB::getOUTLINE_ATOMIC(unsigned Opc, AtomicOrdering Order, MVT VT) { unsigned ModeN, ModelN; @@ -857,8 +867,9 @@ setOperationAction({ISD::BITREVERSE, ISD::PARITY}, VT, Expand); // These library functions default to expand. - setOperationAction({ISD::FROUND, ISD::FROUNDEVEN, ISD::FPOWI, ISD::FLDEXP}, - VT, Expand); + setOperationAction( + {ISD::FROUND, ISD::FROUNDEVEN, ISD::FPOWI, ISD::FLDEXP, ISD::FFREXP}, + VT, Expand); // These operations default to expand for vector types. if (VT.isVector()) Index: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h +++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.h @@ -139,6 +139,8 @@ MachineIRBuilder &B) const; bool legalizeFDIV64(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const; + bool legalizeFFREXP(MachineInstr &MI, MachineRegisterInfo &MRI, + MachineIRBuilder &B) const; bool legalizeFastUnsafeFDIV(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const; bool legalizeFastUnsafeFDIV64(MachineInstr &MI, MachineRegisterInfo &MRI, Index: llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp =================================================================== --- llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -918,6 +918,11 @@ .maxScalarIf(typeIs(0, S16), 1, S16) .clampScalar(1, S32, S32) .lower(); + + getActionDefinitionsBuilder(G_FFREXP) + .customFor({{S32, S32}, {S64, S32}, {S16, S16}, {S16, S32}}) + .scalarize(0) + .lower(); } else { getActionDefinitionsBuilder(G_FSQRT) .legalFor({S32, S64}) @@ -943,6 +948,13 @@ .clampScalar(0, S32, S64) .clampScalar(1, S32, S32) .lower(); + + getActionDefinitionsBuilder(G_FFREXP) + .customFor({{S32, S32}, {S64, S32}}) + .scalarize(0) + .minScalar(0, S32) + .clampScalar(1, S32, S32) + .lower(); } getActionDefinitionsBuilder(G_FPTRUNC) @@ -1976,6 +1988,8 @@ return legalizeFMad(MI, MRI, B); case TargetOpcode::G_FDIV: return legalizeFDIV(MI, MRI, B); + case TargetOpcode::G_FFREXP: + return legalizeFFREXP(MI, MRI, B); case TargetOpcode::G_UDIV: case TargetOpcode::G_UREM: case TargetOpcode::G_UDIVREM: @@ -4299,6 +4313,41 @@ return true; } +bool AMDGPULegalizerInfo::legalizeFFREXP(MachineInstr &MI, + MachineRegisterInfo &MRI, + MachineIRBuilder &B) const { + Register Res0 = MI.getOperand(0).getReg(); + Register Res1 = MI.getOperand(1).getReg(); + Register Val = MI.getOperand(2).getReg(); + uint16_t Flags = MI.getFlags(); + + LLT Ty = MRI.getType(Res0); + LLT InstrExpTy = Ty == LLT::scalar(16) ? LLT::scalar(16) : LLT::scalar(32); + + auto Mant = B.buildIntrinsic(Intrinsic::amdgcn_frexp_mant, {Ty}, false) + .addUse(Val) + .setMIFlags(Flags); + auto Exp = B.buildIntrinsic(Intrinsic::amdgcn_frexp_exp, {InstrExpTy}, false) + .addUse(Val) + .setMIFlags(Flags); + + if (ST.hasFractBug()) { + auto Fabs = B.buildFAbs(Ty, Val); + auto Inf = B.buildFConstant(Ty, APFloat::getInf(getFltSemanticForLLT(Ty))); + auto IsFinite = + B.buildFCmp(CmpInst::FCMP_OLT, LLT::scalar(1), Fabs, Inf, Flags); + auto Zero = B.buildConstant(InstrExpTy, 0); + Exp = B.buildSelect(InstrExpTy, IsFinite, Exp, Zero); + Mant = B.buildSelect(Ty, IsFinite, Mant, Val); + } + + B.buildCopy(Res0, Mant); + B.buildSExtOrTrunc(Res1, Exp); + + MI.eraseFromParent(); + return true; +} + bool AMDGPULegalizerInfo::legalizeFDIVFastIntrin(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { Index: llvm/lib/Target/AMDGPU/SIISelLowering.h =================================================================== --- llvm/lib/Target/AMDGPU/SIISelLowering.h +++ llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -106,6 +106,7 @@ SDValue LowerFDIV32(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFDIV64(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFDIV(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFFREXP(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerTrig(SDValue Op, SelectionDAG &DAG) const; SDValue LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const; Index: llvm/lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -474,6 +474,7 @@ setOperationAction(ISD::FFLOOR, MVT::f64, Legal); setOperationAction({ISD::FLDEXP, ISD::STRICT_FLDEXP}, {MVT::f32, MVT::f64}, Legal); + setOperationAction(ISD::FFREXP, {MVT::f32, MVT::f64}, Custom); setOperationAction({ISD::FSIN, ISD::FCOS, ISD::FDIV}, MVT::f32, Custom); setOperationAction(ISD::FDIV, MVT::f64, Custom); @@ -531,6 +532,7 @@ // F16 - VOP2 Actions. setOperationAction({ISD::BR_CC, ISD::SELECT_CC}, MVT::f16, Expand); setOperationAction({ISD::FLDEXP, ISD::STRICT_FLDEXP}, MVT::f16, Custom); + setOperationAction(ISD::FFREXP, MVT::f16, Custom); setOperationAction(ISD::FDIV, MVT::f16, Custom); // F16 - VOP3 Actions. @@ -4824,6 +4826,7 @@ return LowerTrig(Op, DAG); case ISD::SELECT: return LowerSELECT(Op, DAG); case ISD::FDIV: return LowerFDIV(Op, DAG); + case ISD::FFREXP: return LowerFFREXP(Op, DAG); case ISD::ATOMIC_CMP_SWAP: return LowerATOMIC_CMP_SWAP(Op, DAG); case ISD::STORE: return LowerSTORE(Op, DAG); case ISD::GlobalAddress: { @@ -9526,6 +9529,38 @@ llvm_unreachable("Unexpected type for fdiv"); } +SDValue SITargetLowering::LowerFFREXP(SDValue Op, SelectionDAG &DAG) const { + SDLoc dl(Op); + SDValue Val = Op.getOperand(0); + EVT VT = Val.getValueType(); + EVT ResultExpVT = Op->getValueType(1); + EVT InstrExpVT = VT == MVT::f16 ? MVT::i16 : MVT::i32; + + SDValue Mant = DAG.getNode( + ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getTargetConstant(Intrinsic::amdgcn_frexp_mant, dl, MVT::i32), Val); + + SDValue Exp = DAG.getNode( + ISD::INTRINSIC_WO_CHAIN, dl, InstrExpVT, + DAG.getTargetConstant(Intrinsic::amdgcn_frexp_exp, dl, MVT::i32), Val); + + const GCNSubtarget &ST = + DAG.getMachineFunction().getSubtarget(); + if (ST.hasFractBug()) { + SDValue Fabs = DAG.getNode(ISD::FABS, dl, VT, Val); + SDValue Inf = DAG.getConstantFP( + APFloat::getInf(SelectionDAG::EVTToAPFloatSemantics(VT)), dl, VT); + + SDValue IsFinite = DAG.getSetCC(dl, MVT::i1, Fabs, Inf, ISD::SETOLT); + SDValue Zero = DAG.getConstant(0, dl, InstrExpVT); + Exp = DAG.getNode(ISD::SELECT, dl, InstrExpVT, IsFinite, Exp, Zero); + Mant = DAG.getNode(ISD::SELECT, dl, VT, IsFinite, Mant, Val); + } + + SDValue CastExp = DAG.getSExtOrTrunc(Exp, dl, ResultExpVT); + return DAG.getMergeValues({Mant, CastExp}, dl); +} + SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); StoreSDNode *Store = cast(Op); Index: llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir =================================================================== --- llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir +++ llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir @@ -479,6 +479,9 @@ # DEBUG-NEXT: G_FLDEXP (opcode {{[0-9]+}}): 2 type indices, 0 imm indices # DEBUG-NEXT:.. type index coverage check SKIPPED: no rules defined # DEBUG-NEXT:.. imm index coverage check SKIPPED: no rules defined +# DEBUG-NEXT: G_FFREXP (opcode {{[0-9]+}}): 2 type indices, 0 imm indices +# DEBUG-NEXT:.. type index coverage check SKIPPED: no rules defined +# DEBUG-NEXT:.. imm index coverage check SKIPPED: no rules defined # DEBUG-NEXT: G_FNEG (opcode {{[0-9]+}}): 1 type index, 0 imm indices # DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} # DEBUG-NEXT: .. the first uncovered type index: 1, OK Index: llvm/test/CodeGen/AMDGPU/llvm.frexp.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/AMDGPU/llvm.frexp.ll @@ -0,0 +1,1216 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6,GFX6-SDAG %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,GFX8,GFX8-SDAG %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-SDAG %s +; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-SDAG %s + +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6,GFX6-GISEL %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,GFX8,GFX8-GISEL %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,GFX9-GISEL %s +; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11,GFX11-GISEL %s + +define { half, i32 } @test_frexp_f16_i32(half %a) { +; GFX6-SDAG-LABEL: test_frexp_f16_i32: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; GFX6-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-SDAG-NEXT: v_frexp_mant_f32_e32 v1, v0 +; GFX6-SDAG-NEXT: v_frexp_exp_i32_f32_e32 v2, v0 +; GFX6-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 +; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: test_frexp_f16_i32: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_frexp_mant_f16_e32 v2, v0 +; GFX8-NEXT: v_frexp_exp_i16_f16_e32 v0, v0 +; GFX8-NEXT: v_bfe_i32 v1, v0, 0, 16 +; GFX8-NEXT: v_mov_b32_e32 v0, v2 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: test_frexp_f16_i32: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_frexp_mant_f16_e32 v2, v0 +; GFX9-NEXT: v_frexp_exp_i16_f16_e32 v0, v0 +; GFX9-NEXT: v_bfe_i32 v1, v0, 0, 16 +; GFX9-NEXT: v_mov_b32_e32 v0, v2 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: test_frexp_f16_i32: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: v_frexp_exp_i16_f16_e32 v1, v0 +; GFX11-NEXT: v_frexp_mant_f16_e32 v0, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-NEXT: v_bfe_i32 v1, v1, 0, 16 +; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: test_frexp_f16_i32: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-GISEL-NEXT: v_mov_b32_e32 v1, 0x7f800000 +; GFX6-GISEL-NEXT: v_frexp_mant_f32_e32 v2, v0 +; GFX6-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1 +; GFX6-GISEL-NEXT: v_frexp_exp_i32_f32_e32 v3, v0 +; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX6-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] + %result = call { half, i32 } @llvm.frexp.f16.i32(half %a) + ret { half, i32 } %result +} + +define half @test_frexp_f16_i32_only_use_fract(half %a) { +; GFX6-SDAG-LABEL: test_frexp_f16_i32_only_use_fract: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; GFX6-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-SDAG-NEXT: v_frexp_mant_f32_e32 v1, v0 +; GFX6-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 +; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: test_frexp_f16_i32_only_use_fract: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_frexp_mant_f16_e32 v0, v0 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: test_frexp_f16_i32_only_use_fract: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_frexp_mant_f16_e32 v0, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: test_frexp_f16_i32_only_use_fract: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: v_frexp_mant_f16_e32 v0, v0 +; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: test_frexp_f16_i32_only_use_fract: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-GISEL-NEXT: v_mov_b32_e32 v1, 0x7f800000 +; GFX6-GISEL-NEXT: v_frexp_mant_f32_e32 v2, v0 +; GFX6-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1 +; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX6-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] + %result = call { half, i32 } @llvm.frexp.f16.i32(half %a) + %result.0 = extractvalue { half, i32 } %result, 0 + ret half %result.0 +} + +define i32 @test_frexp_f16_i32_only_use_exp(half %a) { +; GFX6-SDAG-LABEL: test_frexp_f16_i32_only_use_exp: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; GFX6-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-SDAG-NEXT: v_frexp_exp_i32_f32_e32 v1, v0 +; GFX6-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 +; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: test_frexp_f16_i32_only_use_exp: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_frexp_exp_i16_f16_e32 v0, v0 +; GFX8-NEXT: v_bfe_i32 v0, v0, 0, 16 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: test_frexp_f16_i32_only_use_exp: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_frexp_exp_i16_f16_e32 v0, v0 +; GFX9-NEXT: v_bfe_i32 v0, v0, 0, 16 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: test_frexp_f16_i32_only_use_exp: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: v_frexp_exp_i16_f16_e32 v0, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-NEXT: v_bfe_i32 v0, v0, 0, 16 +; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: test_frexp_f16_i32_only_use_exp: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-GISEL-NEXT: v_mov_b32_e32 v1, 0x7f800000 +; GFX6-GISEL-NEXT: v_frexp_exp_i32_f32_e32 v2, v0 +; GFX6-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1 +; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] + %result = call { half, i32 } @llvm.frexp.f16.i32(half %a) + %result.0 = extractvalue { half, i32 } %result, 1 + ret i32 %result.0 +} + +define { <2 x half>, <2 x i32> } @test_frexp_v2f16_v2i32(<2 x half> %a) { +; GFX6-SDAG-LABEL: test_frexp_v2f16_v2i32: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; GFX6-SDAG-NEXT: v_cvt_f32_f16_e32 v2, v0 +; GFX6-SDAG-NEXT: v_cvt_f32_f16_e32 v3, v1 +; GFX6-SDAG-NEXT: v_frexp_mant_f32_e32 v0, v2 +; GFX6-SDAG-NEXT: v_frexp_mant_f32_e32 v1, v3 +; GFX6-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v2|, s4 +; GFX6-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v3|, s4 +; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v0, v2, v0, vcc +; GFX6-SDAG-NEXT: v_cndmask_b32_e64 v1, v3, v1, s[4:5] +; GFX6-SDAG-NEXT: v_frexp_exp_i32_f32_e32 v2, v2 +; GFX6-SDAG-NEXT: v_frexp_exp_i32_f32_e32 v3, v3 +; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GFX6-SDAG-NEXT: v_cndmask_b32_e64 v3, 0, v3, s[4:5] +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: test_frexp_v2f16_v2i32: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GFX8-SDAG-NEXT: v_frexp_mant_f16_e32 v1, v0 +; GFX8-SDAG-NEXT: v_frexp_mant_f16_sdwa v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; GFX8-SDAG-NEXT: v_or_b32_e32 v3, v1, v3 +; GFX8-SDAG-NEXT: v_frexp_exp_i16_f16_e32 v1, v2 +; GFX8-SDAG-NEXT: v_frexp_exp_i16_f16_e32 v0, v0 +; GFX8-SDAG-NEXT: v_bfe_i32 v2, v1, 0, 16 +; GFX8-SDAG-NEXT: v_bfe_i32 v1, v0, 0, 16 +; GFX8-SDAG-NEXT: v_mov_b32_e32 v0, v3 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: test_frexp_v2f16_v2i32: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX9-SDAG-NEXT: v_frexp_mant_f16_e32 v2, v1 +; GFX9-SDAG-NEXT: v_frexp_mant_f16_e32 v3, v0 +; GFX9-SDAG-NEXT: v_pack_b32_f16 v3, v3, v2 +; GFX9-SDAG-NEXT: v_frexp_exp_i16_f16_e32 v1, v1 +; GFX9-SDAG-NEXT: v_frexp_exp_i16_f16_e32 v0, v0 +; GFX9-SDAG-NEXT: v_bfe_i32 v2, v1, 0, 16 +; GFX9-SDAG-NEXT: v_bfe_i32 v1, v0, 0, 16 +; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, v3 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: test_frexp_v2f16_v2i32: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX11-NEXT: v_frexp_mant_f16_e32 v2, v0 +; GFX11-NEXT: v_frexp_exp_i16_f16_e32 v0, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3) +; GFX11-NEXT: v_frexp_mant_f16_e32 v3, v1 +; GFX11-NEXT: v_frexp_exp_i16_f16_e32 v4, v1 +; GFX11-NEXT: v_bfe_i32 v1, v0, 0, 16 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) +; GFX11-NEXT: v_pack_b32_f16 v0, v2, v3 +; GFX11-NEXT: v_bfe_i32 v2, v4, 0, 16 +; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: test_frexp_v2f16_v2i32: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX6-GISEL-NEXT: s_mov_b32 s4, 0x7f800000 +; GFX6-GISEL-NEXT: v_frexp_mant_f32_e32 v3, v0 +; GFX6-GISEL-NEXT: v_frexp_exp_i32_f32_e32 v2, v0 +; GFX6-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 +; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; GFX6-GISEL-NEXT: v_frexp_mant_f32_e32 v4, v1 +; GFX6-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, s4 +; GFX6-GISEL-NEXT: v_frexp_exp_i32_f32_e32 v3, v1 +; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc +; GFX6-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: test_frexp_v2f16_v2i32: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GFX8-GISEL-NEXT: v_frexp_mant_f16_e32 v3, v0 +; GFX8-GISEL-NEXT: v_frexp_exp_i16_f16_e32 v0, v0 +; GFX8-GISEL-NEXT: v_bfe_i32 v1, v0, 0, 16 +; GFX8-GISEL-NEXT: v_frexp_mant_f16_sdwa v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD +; GFX8-GISEL-NEXT: v_frexp_exp_i16_f16_e32 v2, v2 +; GFX8-GISEL-NEXT: v_bfe_i32 v2, v2, 0, 16 +; GFX8-GISEL-NEXT: v_or_b32_e32 v0, v3, v0 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: test_frexp_v2f16_v2i32: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v0 +; GFX9-GISEL-NEXT: v_frexp_mant_f16_e32 v3, v0 +; GFX9-GISEL-NEXT: v_frexp_exp_i16_f16_e32 v0, v0 +; GFX9-GISEL-NEXT: v_bfe_i32 v1, v0, 0, 16 +; GFX9-GISEL-NEXT: v_frexp_mant_f16_e32 v0, v2 +; GFX9-GISEL-NEXT: v_frexp_exp_i16_f16_e32 v2, v2 +; GFX9-GISEL-NEXT: v_bfe_i32 v2, v2, 0, 16 +; GFX9-GISEL-NEXT: v_pack_b32_f16 v0, v3, v0 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] + %result = call { <2 x half>, <2 x i32> } @llvm.frexp.v2f16.v2i32(<2 x half> %a) + ret { <2 x half>, <2 x i32> } %result +} + +define <2 x half> @test_frexp_v2f16_v2i32_only_use_fract(<2 x half> %a) { +; GFX6-SDAG-LABEL: test_frexp_v2f16_v2i32_only_use_fract: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; GFX6-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX6-SDAG-NEXT: v_frexp_mant_f32_e32 v2, v0 +; GFX6-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 +; GFX6-SDAG-NEXT: v_frexp_mant_f32_e32 v3, v1 +; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX6-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, s4 +; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: test_frexp_v2f16_v2i32_only_use_fract: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_frexp_mant_f16_e32 v1, v0 +; GFX8-NEXT: v_frexp_mant_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX8-NEXT: v_or_b32_e32 v0, v1, v0 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: test_frexp_v2f16_v2i32_only_use_fract: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_frexp_mant_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX9-SDAG-NEXT: v_frexp_mant_f16_e32 v0, v0 +; GFX9-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: test_frexp_v2f16_v2i32_only_use_fract: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX11-NEXT: v_frexp_mant_f16_e32 v0, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) +; GFX11-NEXT: v_frexp_mant_f16_e32 v1, v1 +; GFX11-NEXT: v_pack_b32_f16 v0, v0, v1 +; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: test_frexp_v2f16_v2i32_only_use_fract: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX6-GISEL-NEXT: s_mov_b32 s4, 0x7f800000 +; GFX6-GISEL-NEXT: v_frexp_mant_f32_e32 v2, v0 +; GFX6-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 +; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX6-GISEL-NEXT: v_frexp_mant_f32_e32 v2, v1 +; GFX6-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, s4 +; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc +; GFX6-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: test_frexp_v2f16_v2i32_only_use_fract: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_frexp_mant_f16_e32 v1, v0 +; GFX9-GISEL-NEXT: v_frexp_mant_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX9-GISEL-NEXT: v_pack_b32_f16 v0, v1, v0 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] + %result = call { <2 x half>, <2 x i32> } @llvm.frexp.v2f16.v2i32(<2 x half> %a) + %result.0 = extractvalue { <2 x half>, <2 x i32> } %result, 0 + ret <2 x half> %result.0 +} + +define <2 x i32> @test_frexp_v2f16_v2i32_only_use_exp(<2 x half> %a) { +; GFX6-SDAG-LABEL: test_frexp_v2f16_v2i32_only_use_exp: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 +; GFX6-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; GFX6-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-SDAG-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX6-SDAG-NEXT: v_frexp_exp_i32_f32_e32 v2, v0 +; GFX6-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 +; GFX6-SDAG-NEXT: v_frexp_exp_i32_f32_e32 v3, v1 +; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc +; GFX6-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, s4 +; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: test_frexp_v2f16_v2i32_only_use_exp: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_frexp_exp_i16_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX8-SDAG-NEXT: v_frexp_exp_i16_f16_e32 v0, v0 +; GFX8-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 16 +; GFX8-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 16 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: test_frexp_v2f16_v2i32_only_use_exp: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_frexp_exp_i16_f16_sdwa v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX9-SDAG-NEXT: v_frexp_exp_i16_f16_e32 v0, v0 +; GFX9-SDAG-NEXT: v_bfe_i32 v1, v1, 0, 16 +; GFX9-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 16 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: test_frexp_v2f16_v2i32_only_use_exp: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0 +; GFX11-NEXT: v_frexp_exp_i16_f16_e32 v0, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-NEXT: v_frexp_exp_i16_f16_e32 v1, v1 +; GFX11-NEXT: v_bfe_i32 v0, v0, 0, 16 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-NEXT: v_bfe_i32 v1, v1, 0, 16 +; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: test_frexp_v2f16_v2i32_only_use_exp: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-GISEL-NEXT: v_cvt_f32_f16_e32 v1, v1 +; GFX6-GISEL-NEXT: s_mov_b32 s4, 0x7f800000 +; GFX6-GISEL-NEXT: v_frexp_exp_i32_f32_e32 v2, v0 +; GFX6-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 +; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc +; GFX6-GISEL-NEXT: v_frexp_exp_i32_f32_e32 v2, v1 +; GFX6-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, s4 +; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: test_frexp_v2f16_v2i32_only_use_exp: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_frexp_exp_i16_f16_e32 v1, v0 +; GFX8-GISEL-NEXT: v_bfe_i32 v2, v1, 0, 16 +; GFX8-GISEL-NEXT: v_frexp_exp_i16_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX8-GISEL-NEXT: v_bfe_i32 v1, v0, 0, 16 +; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, v2 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: test_frexp_v2f16_v2i32_only_use_exp: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_frexp_exp_i16_f16_e32 v1, v0 +; GFX9-GISEL-NEXT: v_bfe_i32 v2, v1, 0, 16 +; GFX9-GISEL-NEXT: v_frexp_exp_i16_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 +; GFX9-GISEL-NEXT: v_bfe_i32 v1, v0, 0, 16 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, v2 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] + %result = call { <2 x half>, <2 x i32> } @llvm.frexp.v2f16.v2i32(<2 x half> %a) + %result.1 = extractvalue { <2 x half>, <2 x i32> } %result, 1 + ret <2 x i32> %result.1 +} + +define { half, i16 } @test_frexp_f16_i16(half %a) { +; GFX6-SDAG-LABEL: test_frexp_f16_i16: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; GFX6-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-SDAG-NEXT: v_frexp_mant_f32_e32 v1, v0 +; GFX6-SDAG-NEXT: v_frexp_exp_i32_f32_e32 v2, v0 +; GFX6-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 +; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: test_frexp_f16_i16: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_frexp_mant_f16_e32 v2, v0 +; GFX8-NEXT: v_frexp_exp_i16_f16_e32 v1, v0 +; GFX8-NEXT: v_mov_b32_e32 v0, v2 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: test_frexp_f16_i16: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_frexp_mant_f16_e32 v2, v0 +; GFX9-NEXT: v_frexp_exp_i16_f16_e32 v1, v0 +; GFX9-NEXT: v_mov_b32_e32 v0, v2 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: test_frexp_f16_i16: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: v_frexp_mant_f16_e32 v2, v0 +; GFX11-NEXT: v_frexp_exp_i16_f16_e32 v1, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-NEXT: v_mov_b32_e32 v0, v2 +; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: test_frexp_f16_i16: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-GISEL-NEXT: v_mov_b32_e32 v1, 0x7f800000 +; GFX6-GISEL-NEXT: v_frexp_mant_f32_e32 v2, v0 +; GFX6-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1 +; GFX6-GISEL-NEXT: v_frexp_exp_i32_f32_e32 v3, v0 +; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX6-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v3, vcc +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] + %result = call { half, i16 } @llvm.frexp.f16.i16(half %a) + ret { half, i16 } %result +} + +define half @test_frexp_f16_i16_only_use_fract(half %a) { +; GFX6-SDAG-LABEL: test_frexp_f16_i16_only_use_fract: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; GFX6-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-SDAG-NEXT: v_frexp_mant_f32_e32 v1, v0 +; GFX6-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 +; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: test_frexp_f16_i16_only_use_fract: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_frexp_mant_f16_e32 v0, v0 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: test_frexp_f16_i16_only_use_fract: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_frexp_mant_f16_e32 v0, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: test_frexp_f16_i16_only_use_fract: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: v_frexp_mant_f16_e32 v0, v0 +; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: test_frexp_f16_i16_only_use_fract: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-GISEL-NEXT: v_mov_b32_e32 v1, 0x7f800000 +; GFX6-GISEL-NEXT: v_frexp_mant_f32_e32 v2, v0 +; GFX6-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1 +; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX6-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] + %result = call { half, i16 } @llvm.frexp.f16.i16(half %a) + %result.0 = extractvalue { half, i16 } %result, 0 + ret half %result.0 +} + +define i16 @test_frexp_f16_i16_only_use_exp(half %a) { +; GFX6-SDAG-LABEL: test_frexp_f16_i16_only_use_exp: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX6-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; GFX6-SDAG-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-SDAG-NEXT: v_frexp_exp_i32_f32_e32 v1, v0 +; GFX6-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 +; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: test_frexp_f16_i16_only_use_exp: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_frexp_exp_i16_f16_e32 v0, v0 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: test_frexp_f16_i16_only_use_exp: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_frexp_exp_i16_f16_e32 v0, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: test_frexp_f16_i16_only_use_exp: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: v_frexp_exp_i16_f16_e32 v0, v0 +; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: test_frexp_f16_i16_only_use_exp: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: v_cvt_f32_f16_e32 v0, v0 +; GFX6-GISEL-NEXT: v_mov_b32_e32 v1, 0x7f800000 +; GFX6-GISEL-NEXT: v_frexp_exp_i32_f32_e32 v2, v0 +; GFX6-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v1 +; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] + %result = call { half, i16 } @llvm.frexp.f16.i16(half %a) + %result.0 = extractvalue { half, i16 } %result, 1 + ret i16 %result.0 +} + +; define { <2 x half>, <2 x i16> } @test_frexp_v2f16_v2i16(<2 x half> %a) { +; %result = call { <2 x half>, <2 x i16> } @llvm.frexp.v2f16.v2i16(<2 x half> %a) +; ret { <2 x half>, <2 x i16> } %result +; } + +; define <2 x half> @test_frexp_v2f16_v2i16_only_use_fract(<2 x half> %a) { +; %result = call { <2 x half>, <2 x i16> } @llvm.frexp.v2f16.v2i16(<2 x half> %a) +; %result.0 = extractvalue { <2 x half>, <2 x i16> } %result, 0 +; ret <2 x half> %result.0 +; } + +; define <2 x i16> @test_frexp_v2f16_v2i16_only_use_exp(<2 x half> %a) { +; %result = call { <2 x half>, <2 x i16> } @llvm.frexp.v2f16.v2i16(<2 x half> %a) +; %result.1 = extractvalue { <2 x half>, <2 x i16> } %result, 1 +; ret <2 x i16> %result.1 +; } + +define { float, i32 } @test_frexp_f32_i32(float %a) { +; GFX6-SDAG-LABEL: test_frexp_f32_i32: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; GFX6-SDAG-NEXT: v_frexp_mant_f32_e32 v1, v0 +; GFX6-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 +; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v2, v0, v1, vcc +; GFX6-SDAG-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 +; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v1, 0, v0, vcc +; GFX6-SDAG-NEXT: v_mov_b32_e32 v0, v2 +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: test_frexp_f32_i32: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_frexp_mant_f32_e32 v2, v0 +; GFX8-NEXT: v_frexp_exp_i32_f32_e32 v1, v0 +; GFX8-NEXT: v_mov_b32_e32 v0, v2 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: test_frexp_f32_i32: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_frexp_mant_f32_e32 v2, v0 +; GFX9-NEXT: v_frexp_exp_i32_f32_e32 v1, v0 +; GFX9-NEXT: v_mov_b32_e32 v0, v2 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: test_frexp_f32_i32: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: v_frexp_mant_f32_e32 v2, v0 +; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v1, v0 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-NEXT: v_mov_b32_e32 v0, v2 +; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: test_frexp_f32_i32: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: v_mov_b32_e32 v3, 0x7f800000 +; GFX6-GISEL-NEXT: v_frexp_mant_f32_e32 v2, v0 +; GFX6-GISEL-NEXT: v_frexp_exp_i32_f32_e32 v1, v0 +; GFX6-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v3 +; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] + %result = call { float, i32 } @llvm.frexp.f32.i32(float %a) + ret { float, i32 } %result +} + +define float @test_frexp_f32_i32_only_use_fract(float %a) { +; GFX6-SDAG-LABEL: test_frexp_f32_i32_only_use_fract: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; GFX6-SDAG-NEXT: v_frexp_mant_f32_e32 v1, v0 +; GFX6-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 +; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: test_frexp_f32_i32_only_use_fract: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_frexp_mant_f32_e32 v0, v0 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: test_frexp_f32_i32_only_use_fract: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_frexp_mant_f32_e32 v0, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: test_frexp_f32_i32_only_use_fract: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: v_frexp_mant_f32_e32 v0, v0 +; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: test_frexp_f32_i32_only_use_fract: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX6-GISEL-NEXT: v_frexp_mant_f32_e32 v1, v0 +; GFX6-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2 +; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] + %result = call { float, i32 } @llvm.frexp.f32.i32(float %a) + %result.0 = extractvalue { float, i32 } %result, 0 + ret float %result.0 +} + +define i32 @test_frexp_f32_i32_only_use_exp(float %a) { +; GFX6-SDAG-LABEL: test_frexp_f32_i32_only_use_exp: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; GFX6-SDAG-NEXT: v_frexp_exp_i32_f32_e32 v1, v0 +; GFX6-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 +; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: test_frexp_f32_i32_only_use_exp: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: test_frexp_f32_i32_only_use_exp: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: test_frexp_f32_i32_only_use_exp: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 +; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: test_frexp_f32_i32_only_use_exp: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: v_mov_b32_e32 v2, 0x7f800000 +; GFX6-GISEL-NEXT: v_frexp_exp_i32_f32_e32 v1, v0 +; GFX6-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, v2 +; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] + %result = call { float, i32 } @llvm.frexp.f32.i32(float %a) + %result.0 = extractvalue { float, i32 } %result, 1 + ret i32 %result.0 +} + +define { <2 x float>, <2 x i32> } @test_frexp_v2f32_v2i32(<2 x float> %a) { +; GFX6-SDAG-LABEL: test_frexp_v2f32_v2i32: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s4, 0x7f800000 +; GFX6-SDAG-NEXT: v_frexp_mant_f32_e32 v2, v0 +; GFX6-SDAG-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 +; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v4, v0, v2, vcc +; GFX6-SDAG-NEXT: v_frexp_mant_f32_e32 v2, v1 +; GFX6-SDAG-NEXT: v_cmp_lt_f32_e64 s[4:5], |v1|, s4 +; GFX6-SDAG-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 +; GFX6-SDAG-NEXT: v_cndmask_b32_e64 v5, v1, v2, s[4:5] +; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v0, vcc +; GFX6-SDAG-NEXT: v_frexp_exp_i32_f32_e32 v0, v1 +; GFX6-SDAG-NEXT: v_cndmask_b32_e64 v3, 0, v0, s[4:5] +; GFX6-SDAG-NEXT: v_mov_b32_e32 v0, v4 +; GFX6-SDAG-NEXT: v_mov_b32_e32 v1, v5 +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-SDAG-LABEL: test_frexp_v2f32_v2i32: +; GFX8-SDAG: ; %bb.0: +; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-SDAG-NEXT: v_frexp_mant_f32_e32 v4, v0 +; GFX8-SDAG-NEXT: v_frexp_mant_f32_e32 v5, v1 +; GFX8-SDAG-NEXT: v_frexp_exp_i32_f32_e32 v2, v0 +; GFX8-SDAG-NEXT: v_frexp_exp_i32_f32_e32 v3, v1 +; GFX8-SDAG-NEXT: v_mov_b32_e32 v0, v4 +; GFX8-SDAG-NEXT: v_mov_b32_e32 v1, v5 +; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-SDAG-LABEL: test_frexp_v2f32_v2i32: +; GFX9-SDAG: ; %bb.0: +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: v_frexp_mant_f32_e32 v4, v0 +; GFX9-SDAG-NEXT: v_frexp_mant_f32_e32 v5, v1 +; GFX9-SDAG-NEXT: v_frexp_exp_i32_f32_e32 v2, v0 +; GFX9-SDAG-NEXT: v_frexp_exp_i32_f32_e32 v3, v1 +; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, v4 +; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, v5 +; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: test_frexp_v2f32_v2i32: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: v_frexp_mant_f32_e32 v4, v0 +; GFX11-NEXT: v_frexp_mant_f32_e32 v5, v1 +; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v2, v0 +; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v3, v1 +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) +; GFX11-NEXT: v_dual_mov_b32 v0, v4 :: v_dual_mov_b32 v1, v5 +; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: test_frexp_v2f32_v2i32: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: s_mov_b32 s4, 0x7f800000 +; GFX6-GISEL-NEXT: v_frexp_mant_f32_e32 v3, v0 +; GFX6-GISEL-NEXT: v_frexp_exp_i32_f32_e32 v2, v0 +; GFX6-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 +; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; GFX6-GISEL-NEXT: v_frexp_mant_f32_e32 v4, v1 +; GFX6-GISEL-NEXT: v_frexp_exp_i32_f32_e32 v3, v1 +; GFX6-GISEL-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, s4 +; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc +; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-GISEL-LABEL: test_frexp_v2f32_v2i32: +; GFX8-GISEL: ; %bb.0: +; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: v_frexp_mant_f32_e32 v5, v0 +; GFX8-GISEL-NEXT: v_frexp_mant_f32_e32 v4, v1 +; GFX8-GISEL-NEXT: v_frexp_exp_i32_f32_e32 v2, v0 +; GFX8-GISEL-NEXT: v_frexp_exp_i32_f32_e32 v3, v1 +; GFX8-GISEL-NEXT: v_mov_b32_e32 v0, v5 +; GFX8-GISEL-NEXT: v_mov_b32_e32 v1, v4 +; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-GISEL-LABEL: test_frexp_v2f32_v2i32: +; GFX9-GISEL: ; %bb.0: +; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: v_frexp_mant_f32_e32 v5, v0 +; GFX9-GISEL-NEXT: v_frexp_mant_f32_e32 v4, v1 +; GFX9-GISEL-NEXT: v_frexp_exp_i32_f32_e32 v2, v0 +; GFX9-GISEL-NEXT: v_frexp_exp_i32_f32_e32 v3, v1 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, v5 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, v4 +; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] + %result = call { <2 x float>, <2 x i32> } @llvm.frexp.v2f32.v2i32(<2 x float> %a) + ret { <2 x float>, <2 x i32> } %result +} + +define <2 x float> @test_frexp_v2f32_v2i32_only_use_fract(<2 x float> %a) { +; GFX6-LABEL: test_frexp_v2f32_v2i32_only_use_fract: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: s_mov_b32 s4, 0x7f800000 +; GFX6-NEXT: v_frexp_mant_f32_e32 v2, v0 +; GFX6-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 +; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX6-NEXT: v_frexp_mant_f32_e32 v2, v1 +; GFX6-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, s4 +; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: test_frexp_v2f32_v2i32_only_use_fract: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_frexp_mant_f32_e32 v0, v0 +; GFX8-NEXT: v_frexp_mant_f32_e32 v1, v1 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: test_frexp_v2f32_v2i32_only_use_fract: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_frexp_mant_f32_e32 v0, v0 +; GFX9-NEXT: v_frexp_mant_f32_e32 v1, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: test_frexp_v2f32_v2i32_only_use_fract: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: v_frexp_mant_f32_e32 v0, v0 +; GFX11-NEXT: v_frexp_mant_f32_e32 v1, v1 +; GFX11-NEXT: s_setpc_b64 s[30:31] + %result = call { <2 x float>, <2 x i32> } @llvm.frexp.v2f32.v2i32(<2 x float> %a) + %result.0 = extractvalue { <2 x float>, <2 x i32> } %result, 0 + ret <2 x float> %result.0 +} + +define <2 x i32> @test_frexp_v2f32_v2i32_only_use_exp(<2 x float> %a) { +; GFX6-LABEL: test_frexp_v2f32_v2i32_only_use_exp: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: s_mov_b32 s4, 0x7f800000 +; GFX6-NEXT: v_frexp_exp_i32_f32_e32 v2, v0 +; GFX6-NEXT: v_cmp_lt_f32_e64 vcc, |v0|, s4 +; GFX6-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc +; GFX6-NEXT: v_frexp_exp_i32_f32_e32 v2, v1 +; GFX6-NEXT: v_cmp_lt_f32_e64 vcc, |v1|, s4 +; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v2, vcc +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: test_frexp_v2f32_v2i32_only_use_exp: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 +; GFX8-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: test_frexp_v2f32_v2i32_only_use_exp: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 +; GFX9-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: test_frexp_v2f32_v2i32_only_use_exp: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v0, v0 +; GFX11-NEXT: v_frexp_exp_i32_f32_e32 v1, v1 +; GFX11-NEXT: s_setpc_b64 s[30:31] + %result = call { <2 x float>, <2 x i32> } @llvm.frexp.v2f32.v2i32(<2 x float> %a) + %result.1 = extractvalue { <2 x float>, <2 x i32> } %result, 1 + ret <2 x i32> %result.1 +} + +define { double, i32 } @test_frexp_f64_i32(double %a) { +; GFX6-SDAG-LABEL: test_frexp_f64_i32: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s4, 0 +; GFX6-SDAG-NEXT: s_mov_b32 s5, 0x7ff00000 +; GFX6-SDAG-NEXT: v_frexp_exp_i32_f64_e32 v2, v[0:1] +; GFX6-SDAG-NEXT: v_frexp_mant_f64_e32 v[3:4], v[0:1] +; GFX6-SDAG-NEXT: v_cmp_lt_f64_e64 vcc, |v[0:1]|, s[4:5] +; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: test_frexp_f64_i32: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_frexp_mant_f64_e32 v[3:4], v[0:1] +; GFX8-NEXT: v_frexp_exp_i32_f64_e32 v2, v[0:1] +; GFX8-NEXT: v_mov_b32_e32 v0, v3 +; GFX8-NEXT: v_mov_b32_e32 v1, v4 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: test_frexp_f64_i32: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_frexp_mant_f64_e32 v[3:4], v[0:1] +; GFX9-NEXT: v_frexp_exp_i32_f64_e32 v2, v[0:1] +; GFX9-NEXT: v_mov_b32_e32 v0, v3 +; GFX9-NEXT: v_mov_b32_e32 v1, v4 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: test_frexp_f64_i32: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: v_frexp_mant_f64_e32 v[3:4], v[0:1] +; GFX11-NEXT: v_frexp_exp_i32_f64_e32 v2, v[0:1] +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) +; GFX11-NEXT: v_dual_mov_b32 v0, v3 :: v_dual_mov_b32 v1, v4 +; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: test_frexp_f64_i32: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: s_mov_b32 s4, 0 +; GFX6-GISEL-NEXT: s_mov_b32 s5, 0x7ff00000 +; GFX6-GISEL-NEXT: v_frexp_mant_f64_e32 v[3:4], v[0:1] +; GFX6-GISEL-NEXT: v_frexp_exp_i32_f64_e32 v2, v[0:1] +; GFX6-GISEL-NEXT: v_cmp_lt_f64_e64 vcc, |v[0:1]|, s[4:5] +; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc +; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc +; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] + %result = call { double, i32 } @llvm.frexp.f64.i32(double %a) + ret { double, i32 } %result +} + +define double @test_frexp_f64_i32_only_use_fract(double %a) { +; GFX6-LABEL: test_frexp_f64_i32_only_use_fract: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: s_mov_b32 s4, 0 +; GFX6-NEXT: s_mov_b32 s5, 0x7ff00000 +; GFX6-NEXT: v_frexp_mant_f64_e32 v[2:3], v[0:1] +; GFX6-NEXT: v_cmp_lt_f64_e64 vcc, |v[0:1]|, s[4:5] +; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: test_frexp_f64_i32_only_use_fract: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_frexp_mant_f64_e32 v[0:1], v[0:1] +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: test_frexp_f64_i32_only_use_fract: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_frexp_mant_f64_e32 v[0:1], v[0:1] +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: test_frexp_f64_i32_only_use_fract: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: v_frexp_mant_f64_e32 v[0:1], v[0:1] +; GFX11-NEXT: s_setpc_b64 s[30:31] + %result = call { double, i32 } @llvm.frexp.f64.i32(double %a) + %result.0 = extractvalue { double, i32 } %result, 0 + ret double %result.0 +} + +define i32 @test_frexp_f64_i32_only_use_exp(double %a) { +; GFX6-LABEL: test_frexp_f64_i32_only_use_exp: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: s_mov_b32 s4, 0 +; GFX6-NEXT: s_mov_b32 s5, 0x7ff00000 +; GFX6-NEXT: v_frexp_exp_i32_f64_e32 v2, v[0:1] +; GFX6-NEXT: v_cmp_lt_f64_e64 vcc, |v[0:1]|, s[4:5] +; GFX6-NEXT: v_cndmask_b32_e32 v0, 0, v2, vcc +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: test_frexp_f64_i32_only_use_exp: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_frexp_exp_i32_f64_e32 v0, v[0:1] +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: test_frexp_f64_i32_only_use_exp: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_frexp_exp_i32_f64_e32 v0, v[0:1] +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: test_frexp_f64_i32_only_use_exp: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: v_frexp_exp_i32_f64_e32 v0, v[0:1] +; GFX11-NEXT: s_setpc_b64 s[30:31] + %result = call { double, i32 } @llvm.frexp.f64.i32(double %a) + %result.0 = extractvalue { double, i32 } %result, 1 + ret i32 %result.0 +} + +define { <2 x double>, <2 x i32> } @test_frexp_v2f64_v2i32(<2 x double> %a) { +; GFX6-SDAG-LABEL: test_frexp_v2f64_v2i32: +; GFX6-SDAG: ; %bb.0: +; GFX6-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-SDAG-NEXT: s_mov_b32 s4, 0 +; GFX6-SDAG-NEXT: s_mov_b32 s5, 0x7ff00000 +; GFX6-SDAG-NEXT: v_frexp_exp_i32_f64_e32 v4, v[0:1] +; GFX6-SDAG-NEXT: v_cmp_lt_f64_e64 vcc, |v[0:1]|, s[4:5] +; GFX6-SDAG-NEXT: v_frexp_exp_i32_f64_e32 v5, v[2:3] +; GFX6-SDAG-NEXT: v_frexp_mant_f64_e32 v[6:7], v[0:1] +; GFX6-SDAG-NEXT: v_cmp_lt_f64_e64 s[4:5], |v[2:3]|, s[4:5] +; GFX6-SDAG-NEXT: v_frexp_mant_f64_e32 v[8:9], v[2:3] +; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc +; GFX6-SDAG-NEXT: v_cndmask_b32_e64 v5, 0, v5, s[4:5] +; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc +; GFX6-SDAG-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc +; GFX6-SDAG-NEXT: v_cndmask_b32_e64 v2, v2, v8, s[4:5] +; GFX6-SDAG-NEXT: v_cndmask_b32_e64 v3, v3, v9, s[4:5] +; GFX6-SDAG-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: test_frexp_v2f64_v2i32: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_frexp_mant_f64_e32 v[8:9], v[0:1] +; GFX8-NEXT: v_frexp_mant_f64_e32 v[6:7], v[2:3] +; GFX8-NEXT: v_frexp_exp_i32_f64_e32 v4, v[0:1] +; GFX8-NEXT: v_frexp_exp_i32_f64_e32 v5, v[2:3] +; GFX8-NEXT: v_mov_b32_e32 v0, v8 +; GFX8-NEXT: v_mov_b32_e32 v1, v9 +; GFX8-NEXT: v_mov_b32_e32 v2, v6 +; GFX8-NEXT: v_mov_b32_e32 v3, v7 +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: test_frexp_v2f64_v2i32: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_frexp_mant_f64_e32 v[8:9], v[0:1] +; GFX9-NEXT: v_frexp_mant_f64_e32 v[6:7], v[2:3] +; GFX9-NEXT: v_frexp_exp_i32_f64_e32 v4, v[0:1] +; GFX9-NEXT: v_frexp_exp_i32_f64_e32 v5, v[2:3] +; GFX9-NEXT: v_mov_b32_e32 v0, v8 +; GFX9-NEXT: v_mov_b32_e32 v1, v9 +; GFX9-NEXT: v_mov_b32_e32 v2, v6 +; GFX9-NEXT: v_mov_b32_e32 v3, v7 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: test_frexp_v2f64_v2i32: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: v_frexp_mant_f64_e32 v[8:9], v[0:1] +; GFX11-NEXT: v_frexp_mant_f64_e32 v[6:7], v[2:3] +; GFX11-NEXT: v_frexp_exp_i32_f64_e32 v4, v[0:1] +; GFX11-NEXT: v_frexp_exp_i32_f64_e32 v5, v[2:3] +; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) +; GFX11-NEXT: v_dual_mov_b32 v0, v8 :: v_dual_mov_b32 v1, v9 +; GFX11-NEXT: v_dual_mov_b32 v2, v6 :: v_dual_mov_b32 v3, v7 +; GFX11-NEXT: s_setpc_b64 s[30:31] +; +; GFX6-GISEL-LABEL: test_frexp_v2f64_v2i32: +; GFX6-GISEL: ; %bb.0: +; GFX6-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-GISEL-NEXT: s_mov_b32 s4, 0 +; GFX6-GISEL-NEXT: s_mov_b32 s5, 0x7ff00000 +; GFX6-GISEL-NEXT: v_frexp_mant_f64_e32 v[5:6], v[0:1] +; GFX6-GISEL-NEXT: v_frexp_exp_i32_f64_e32 v4, v[0:1] +; GFX6-GISEL-NEXT: v_cmp_lt_f64_e64 vcc, |v[0:1]|, s[4:5] +; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc +; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc +; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v1, v1, v6, vcc +; GFX6-GISEL-NEXT: v_frexp_mant_f64_e32 v[6:7], v[2:3] +; GFX6-GISEL-NEXT: v_frexp_exp_i32_f64_e32 v5, v[2:3] +; GFX6-GISEL-NEXT: v_cmp_lt_f64_e64 vcc, |v[2:3]|, s[4:5] +; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v5, 0, v5, vcc +; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v2, v2, v6, vcc +; GFX6-GISEL-NEXT: v_cndmask_b32_e32 v3, v3, v7, vcc +; GFX6-GISEL-NEXT: s_setpc_b64 s[30:31] + %result = call { <2 x double>, <2 x i32> } @llvm.frexp.v2f64.v2i32(<2 x double> %a) + ret { <2 x double>, <2 x i32> } %result +} + +define <2 x double> @test_frexp_v2f64_v2i32_only_use_fract(<2 x double> %a) { +; GFX6-LABEL: test_frexp_v2f64_v2i32_only_use_fract: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: s_mov_b32 s4, 0 +; GFX6-NEXT: s_mov_b32 s5, 0x7ff00000 +; GFX6-NEXT: v_frexp_mant_f64_e32 v[4:5], v[0:1] +; GFX6-NEXT: v_cmp_lt_f64_e64 vcc, |v[0:1]|, s[4:5] +; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc +; GFX6-NEXT: v_frexp_mant_f64_e32 v[4:5], v[2:3] +; GFX6-NEXT: v_cmp_lt_f64_e64 vcc, |v[2:3]|, s[4:5] +; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc +; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: test_frexp_v2f64_v2i32_only_use_fract: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_frexp_mant_f64_e32 v[0:1], v[0:1] +; GFX8-NEXT: v_frexp_mant_f64_e32 v[2:3], v[2:3] +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: test_frexp_v2f64_v2i32_only_use_fract: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_frexp_mant_f64_e32 v[0:1], v[0:1] +; GFX9-NEXT: v_frexp_mant_f64_e32 v[2:3], v[2:3] +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: test_frexp_v2f64_v2i32_only_use_fract: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: v_frexp_mant_f64_e32 v[0:1], v[0:1] +; GFX11-NEXT: v_frexp_mant_f64_e32 v[2:3], v[2:3] +; GFX11-NEXT: s_setpc_b64 s[30:31] + %result = call { <2 x double>, <2 x i32> } @llvm.frexp.v2f64.v2i32(<2 x double> %a) + %result.0 = extractvalue { <2 x double>, <2 x i32> } %result, 0 + ret <2 x double> %result.0 +} + +define <2 x i32> @test_frexp_v2f64_v2i32_only_use_exp(<2 x double> %a) { +; GFX6-LABEL: test_frexp_v2f64_v2i32_only_use_exp: +; GFX6: ; %bb.0: +; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX6-NEXT: s_mov_b32 s4, 0 +; GFX6-NEXT: s_mov_b32 s5, 0x7ff00000 +; GFX6-NEXT: v_frexp_exp_i32_f64_e32 v4, v[0:1] +; GFX6-NEXT: v_cmp_lt_f64_e64 vcc, |v[0:1]|, s[4:5] +; GFX6-NEXT: v_frexp_exp_i32_f64_e32 v1, v[2:3] +; GFX6-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc +; GFX6-NEXT: v_cmp_lt_f64_e64 vcc, |v[2:3]|, s[4:5] +; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX6-NEXT: s_setpc_b64 s[30:31] +; +; GFX8-LABEL: test_frexp_v2f64_v2i32_only_use_exp: +; GFX8: ; %bb.0: +; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX8-NEXT: v_frexp_exp_i32_f64_e32 v0, v[0:1] +; GFX8-NEXT: v_frexp_exp_i32_f64_e32 v1, v[2:3] +; GFX8-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-LABEL: test_frexp_v2f64_v2i32_only_use_exp: +; GFX9: ; %bb.0: +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_frexp_exp_i32_f64_e32 v0, v[0:1] +; GFX9-NEXT: v_frexp_exp_i32_f64_e32 v1, v[2:3] +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX11-LABEL: test_frexp_v2f64_v2i32_only_use_exp: +; GFX11: ; %bb.0: +; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 +; GFX11-NEXT: v_frexp_exp_i32_f64_e32 v0, v[0:1] +; GFX11-NEXT: v_frexp_exp_i32_f64_e32 v1, v[2:3] +; GFX11-NEXT: s_setpc_b64 s[30:31] + %result = call { <2 x double>, <2 x i32> } @llvm.frexp.v2f64.v2i32(<2 x double> %a) + %result.1 = extractvalue { <2 x double>, <2 x i32> } %result, 1 + ret <2 x i32> %result.1 +} + +declare { float, i32 } @llvm.frexp.f32.i32(float) #0 +declare { <2 x float>, <2 x i32> } @llvm.frexp.v2f32.v2i32(<2 x float>) #0 + +declare { half, i32 } @llvm.frexp.f16.i32(half) #0 +declare { <2 x half>, <2 x i32> } @llvm.frexp.v2f16.v2i32(<2 x half>) #0 + +declare { double, i32 } @llvm.frexp.f64.i32(double) #0 +declare { <2 x double>, <2 x i32> } @llvm.frexp.v2f64.v2i32(<2 x double>) #0 + +declare { half, i16 } @llvm.frexp.f16.i16(half) #0 +declare { <2 x half>, <2 x i16> } @llvm.frexp.v2f16.v2i16(<2 x half>) #0 + +attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; GCN: {{.*}} +; GFX11-GISEL: {{.*}} +; GFX11-SDAG: {{.*}} Index: llvm/test/CodeGen/PowerPC/llvm.frexp.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/PowerPC/llvm.frexp.ll @@ -0,0 +1,415 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown \ +; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names < %s | FileCheck %s + +define { half, i32 } @test_frexp_f16_i32(half %a) { +; CHECK-LABEL: test_frexp_f16_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -48(r1) +; CHECK-NEXT: std r0, 64(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: xscvdphp f0, f1 +; CHECK-NEXT: addi r4, r1, 44 +; CHECK-NEXT: mffprwz r3, f0 +; CHECK-NEXT: clrlwi r3, r3, 16 +; CHECK-NEXT: mtfprwz f0, r3 +; CHECK-NEXT: xscvhpdp f1, f0 +; CHECK-NEXT: bl frexpf +; CHECK-NEXT: nop +; CHECK-NEXT: lwz r3, 44(r1) +; CHECK-NEXT: addi r1, r1, 48 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr + %result = call { half, i32 } @llvm.frexp.f16.i32(half %a) + ret { half, i32 } %result +} + +define half @test_frexp_f16_i32_only_use_fract(half %a) { +; CHECK-LABEL: test_frexp_f16_i32_only_use_fract: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -48(r1) +; CHECK-NEXT: std r0, 64(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: xscvdphp f0, f1 +; CHECK-NEXT: addi r4, r1, 44 +; CHECK-NEXT: mffprwz r3, f0 +; CHECK-NEXT: clrlwi r3, r3, 16 +; CHECK-NEXT: mtfprwz f0, r3 +; CHECK-NEXT: xscvhpdp f1, f0 +; CHECK-NEXT: bl frexpf +; CHECK-NEXT: nop +; CHECK-NEXT: addi r1, r1, 48 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr + %result = call { half, i32 } @llvm.frexp.f16.i32(half %a) + %result.0 = extractvalue { half, i32 } %result, 0 + ret half %result.0 +} + +define i32 @test_frexp_f16_i32_only_use_exp(half %a) { +; CHECK-LABEL: test_frexp_f16_i32_only_use_exp: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -48(r1) +; CHECK-NEXT: std r0, 64(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: xscvdphp f0, f1 +; CHECK-NEXT: addi r4, r1, 44 +; CHECK-NEXT: mffprwz r3, f0 +; CHECK-NEXT: clrlwi r3, r3, 16 +; CHECK-NEXT: mtfprwz f0, r3 +; CHECK-NEXT: xscvhpdp f1, f0 +; CHECK-NEXT: bl frexpf +; CHECK-NEXT: nop +; CHECK-NEXT: lwz r3, 44(r1) +; CHECK-NEXT: addi r1, r1, 48 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr + %result = call { half, i32 } @llvm.frexp.f16.i32(half %a) + %result.0 = extractvalue { half, i32 } %result, 1 + ret i32 %result.0 +} + +define { <2 x half>, <2 x i32> } @test_frexp_v2f16_v2i32(<2 x half> %a) { +; CHECK-LABEL: test_frexp_v2f16_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: .cfi_def_cfa_offset 80 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset r29, -40 +; CHECK-NEXT: .cfi_offset r30, -32 +; CHECK-NEXT: .cfi_offset f30, -16 +; CHECK-NEXT: .cfi_offset f31, -8 +; CHECK-NEXT: std r29, -40(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r30, -32(r1) # 8-byte Folded Spill +; CHECK-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill +; CHECK-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill +; CHECK-NEXT: stdu r1, -80(r1) +; CHECK-NEXT: std r0, 96(r1) +; CHECK-NEXT: xscvdphp f0, f2 +; CHECK-NEXT: addi r30, r1, 32 +; CHECK-NEXT: mr r4, r30 +; CHECK-NEXT: mffprwz r3, f0 +; CHECK-NEXT: clrlwi r3, r3, 16 +; CHECK-NEXT: mtfprwz f0, r3 +; CHECK-NEXT: xscvhpdp f31, f0 +; CHECK-NEXT: xscvdphp f0, f1 +; CHECK-NEXT: mffprwz r3, f0 +; CHECK-NEXT: clrlwi r3, r3, 16 +; CHECK-NEXT: mtfprwz f0, r3 +; CHECK-NEXT: xscvhpdp f1, f0 +; CHECK-NEXT: bl frexpf +; CHECK-NEXT: nop +; CHECK-NEXT: addi r29, r1, 36 +; CHECK-NEXT: fmr f30, f1 +; CHECK-NEXT: fmr f1, f31 +; CHECK-NEXT: mr r4, r29 +; CHECK-NEXT: bl frexpf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f2, f1 +; CHECK-NEXT: lfiwzx f0, 0, r30 +; CHECK-NEXT: lfiwzx f1, 0, r29 +; CHECK-NEXT: xxmrghw v2, vs1, vs0 +; CHECK-NEXT: fmr f1, f30 +; CHECK-NEXT: addi r1, r1, 80 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload +; CHECK-NEXT: lfd f30, -16(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r30, -32(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r29, -40(r1) # 8-byte Folded Reload +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr + %result = call { <2 x half>, <2 x i32> } @llvm.frexp.v2f16.v2i32(<2 x half> %a) + ret { <2 x half>, <2 x i32> } %result +} + +define <2 x half> @test_frexp_v2f16_v2i32_only_use_fract(<2 x half> %a) { +; CHECK-LABEL: test_frexp_v2f16_v2i32_only_use_fract: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset f30, -16 +; CHECK-NEXT: .cfi_offset f31, -8 +; CHECK-NEXT: stfd f30, -16(r1) # 8-byte Folded Spill +; CHECK-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill +; CHECK-NEXT: stdu r1, -64(r1) +; CHECK-NEXT: std r0, 80(r1) +; CHECK-NEXT: xscvdphp f0, f2 +; CHECK-NEXT: addi r4, r1, 40 +; CHECK-NEXT: mffprwz r3, f0 +; CHECK-NEXT: clrlwi r3, r3, 16 +; CHECK-NEXT: mtfprwz f0, r3 +; CHECK-NEXT: xscvhpdp f31, f0 +; CHECK-NEXT: xscvdphp f0, f1 +; CHECK-NEXT: mffprwz r3, f0 +; CHECK-NEXT: clrlwi r3, r3, 16 +; CHECK-NEXT: mtfprwz f0, r3 +; CHECK-NEXT: xscvhpdp f1, f0 +; CHECK-NEXT: bl frexpf +; CHECK-NEXT: nop +; CHECK-NEXT: addi r4, r1, 44 +; CHECK-NEXT: fmr f30, f1 +; CHECK-NEXT: fmr f1, f31 +; CHECK-NEXT: bl frexpf +; CHECK-NEXT: nop +; CHECK-NEXT: fmr f2, f1 +; CHECK-NEXT: fmr f1, f30 +; CHECK-NEXT: addi r1, r1, 64 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload +; CHECK-NEXT: lfd f30, -16(r1) # 8-byte Folded Reload +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr + %result = call { <2 x half>, <2 x i32> } @llvm.frexp.v2f16.v2i32(<2 x half> %a) + %result.0 = extractvalue { <2 x half>, <2 x i32> } %result, 0 + ret <2 x half> %result.0 +} + +define <2 x i32> @test_frexp_v2f16_v2i32_only_use_exp(<2 x half> %a) { +; CHECK-LABEL: test_frexp_v2f16_v2i32_only_use_exp: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: .cfi_def_cfa_offset 80 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: .cfi_offset r29, -32 +; CHECK-NEXT: .cfi_offset r30, -24 +; CHECK-NEXT: .cfi_offset f31, -8 +; CHECK-NEXT: std r29, -32(r1) # 8-byte Folded Spill +; CHECK-NEXT: std r30, -24(r1) # 8-byte Folded Spill +; CHECK-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill +; CHECK-NEXT: stdu r1, -80(r1) +; CHECK-NEXT: std r0, 96(r1) +; CHECK-NEXT: xscvdphp f0, f2 +; CHECK-NEXT: addi r30, r1, 40 +; CHECK-NEXT: mr r4, r30 +; CHECK-NEXT: mffprwz r3, f0 +; CHECK-NEXT: clrlwi r3, r3, 16 +; CHECK-NEXT: mtfprwz f0, r3 +; CHECK-NEXT: xscvhpdp f31, f0 +; CHECK-NEXT: xscvdphp f0, f1 +; CHECK-NEXT: mffprwz r3, f0 +; CHECK-NEXT: clrlwi r3, r3, 16 +; CHECK-NEXT: mtfprwz f0, r3 +; CHECK-NEXT: xscvhpdp f1, f0 +; CHECK-NEXT: bl frexpf +; CHECK-NEXT: nop +; CHECK-NEXT: addi r29, r1, 44 +; CHECK-NEXT: fmr f1, f31 +; CHECK-NEXT: mr r4, r29 +; CHECK-NEXT: bl frexpf +; CHECK-NEXT: nop +; CHECK-NEXT: lfiwzx f0, 0, r30 +; CHECK-NEXT: lfiwzx f1, 0, r29 +; CHECK-NEXT: xxmrghw v2, vs1, vs0 +; CHECK-NEXT: addi r1, r1, 80 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: lfd f31, -8(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r30, -24(r1) # 8-byte Folded Reload +; CHECK-NEXT: ld r29, -32(r1) # 8-byte Folded Reload +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr + %result = call { <2 x half>, <2 x i32> } @llvm.frexp.v2f16.v2i32(<2 x half> %a) + %result.1 = extractvalue { <2 x half>, <2 x i32> } %result, 1 + ret <2 x i32> %result.1 +} + +define { float, i32 } @test_frexp_f32_i32(float %a) { +; CHECK-LABEL: test_frexp_f32_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -48(r1) +; CHECK-NEXT: std r0, 64(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: addi r4, r1, 44 +; CHECK-NEXT: bl frexpf +; CHECK-NEXT: nop +; CHECK-NEXT: lwz r3, 44(r1) +; CHECK-NEXT: addi r1, r1, 48 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr + %result = call { float, i32 } @llvm.frexp.f32.i32(float %a) + ret { float, i32 } %result +} + +define float @test_frexp_f32_i32_only_use_fract(float %a) { +; CHECK-LABEL: test_frexp_f32_i32_only_use_fract: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -48(r1) +; CHECK-NEXT: std r0, 64(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: addi r4, r1, 44 +; CHECK-NEXT: bl frexpf +; CHECK-NEXT: nop +; CHECK-NEXT: addi r1, r1, 48 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr + %result = call { float, i32 } @llvm.frexp.f32.i32(float %a) + %result.0 = extractvalue { float, i32 } %result, 0 + ret float %result.0 +} + +define i32 @test_frexp_f32_i32_only_use_exp(float %a) { +; CHECK-LABEL: test_frexp_f32_i32_only_use_exp: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -48(r1) +; CHECK-NEXT: std r0, 64(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: addi r4, r1, 44 +; CHECK-NEXT: bl frexpf +; CHECK-NEXT: nop +; CHECK-NEXT: lwz r3, 44(r1) +; CHECK-NEXT: addi r1, r1, 48 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr + %result = call { float, i32 } @llvm.frexp.f32.i32(float %a) + %result.0 = extractvalue { float, i32 } %result, 1 + ret i32 %result.0 +} + +; FIXME +; define { <2 x float>, <2 x i32> } @test_frexp_v2f32_v2i32(<2 x float> %a) { +; %result = call { <2 x float>, <2 x i32> } @llvm.frexp.v2f32.v2i32(<2 x float> %a) +; ret { <2 x float>, <2 x i32> } %result +; } + +; define <2 x float> @test_frexp_v2f32_v2i32_only_use_fract(<2 x float> %a) { +; %result = call { <2 x float>, <2 x i32> } @llvm.frexp.v2f32.v2i32(<2 x float> %a) +; %result.0 = extractvalue { <2 x float>, <2 x i32> } %result, 0 +; ret <2 x float> %result.0 +; } + +; define <2 x i32> @test_frexp_v2f32_v2i32_only_use_exp(<2 x float> %a) { +; %result = call { <2 x float>, <2 x i32> } @llvm.frexp.v2f32.v2i32(<2 x float> %a) +; %result.1 = extractvalue { <2 x float>, <2 x i32> } %result, 1 +; ret <2 x i32> %result.1 +; } + +define { double, i32 } @test_frexp_f64_i32(double %a) { +; CHECK-LABEL: test_frexp_f64_i32: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -48(r1) +; CHECK-NEXT: std r0, 64(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: addi r4, r1, 44 +; CHECK-NEXT: bl frexp +; CHECK-NEXT: nop +; CHECK-NEXT: lwz r3, 44(r1) +; CHECK-NEXT: addi r1, r1, 48 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr + %result = call { double, i32 } @llvm.frexp.f64.i32(double %a) + ret { double, i32 } %result +} + +define double @test_frexp_f64_i32_only_use_fract(double %a) { +; CHECK-LABEL: test_frexp_f64_i32_only_use_fract: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -48(r1) +; CHECK-NEXT: std r0, 64(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: addi r4, r1, 44 +; CHECK-NEXT: bl frexp +; CHECK-NEXT: nop +; CHECK-NEXT: addi r1, r1, 48 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr + %result = call { double, i32 } @llvm.frexp.f64.i32(double %a) + %result.0 = extractvalue { double, i32 } %result, 0 + ret double %result.0 +} + +define i32 @test_frexp_f64_i32_only_use_exp(double %a) { +; CHECK-LABEL: test_frexp_f64_i32_only_use_exp: +; CHECK: # %bb.0: +; CHECK-NEXT: mflr r0 +; CHECK-NEXT: stdu r1, -48(r1) +; CHECK-NEXT: std r0, 64(r1) +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset lr, 16 +; CHECK-NEXT: addi r4, r1, 44 +; CHECK-NEXT: bl frexp +; CHECK-NEXT: nop +; CHECK-NEXT: lwz r3, 44(r1) +; CHECK-NEXT: addi r1, r1, 48 +; CHECK-NEXT: ld r0, 16(r1) +; CHECK-NEXT: mtlr r0 +; CHECK-NEXT: blr + %result = call { double, i32 } @llvm.frexp.f64.i32(double %a) + %result.0 = extractvalue { double, i32 } %result, 1 + ret i32 %result.0 +} + +; FIXME: Widen vector result +; define { <2 x double>, <2 x i32> } @test_frexp_v2f64_v2i32(<2 x double> %a) { +; %result = call { <2 x double>, <2 x i32> } @llvm.frexp.v2f64.v2i32(<2 x double> %a) +; ret { <2 x double>, <2 x i32> } %result +; } + +; define <2 x double> @test_frexp_v2f64_v2i32_only_use_fract(<2 x double> %a) { +; %result = call { <2 x double>, <2 x i32> } @llvm.frexp.v2f64.v2i32(<2 x double> %a) +; %result.0 = extractvalue { <2 x double>, <2 x i32> } %result, 0 +; ret <2 x double> %result.0 +; } + +; define <2 x i32> @test_frexp_v2f64_v2i32_only_use_exp(<2 x double> %a) { +; %result = call { <2 x double>, <2 x i32> } @llvm.frexp.v2f64.v2i32(<2 x double> %a) +; %result.1 = extractvalue { <2 x double>, <2 x i32> } %result, 1 +; ret <2 x i32> %result.1 +; } + +; FIXME: f128 ExpandFloatResult +; define { ppc_fp128, i32 } @test_frexp_f128_i32(ppc_fp128 %a) { +; %result = call { ppc_fp128, i32 } @llvm.frexp.f128.i32(ppc_fp128 %a) +; ret { ppc_fp128, i32 } %result +; } + +; define ppc_fp128 @test_frexp_f128_i32_only_use_fract(ppc_fp128 %a) { +; %result = call { ppc_fp128, i32 } @llvm.frexp.f128.i32(ppc_fp128 %a) +; %result.0 = extractvalue { ppc_fp128, i32 } %result, 0 +; ret ppc_fp128 %result.0 +; } + +; define i32 @test_frexp_f128_i32_only_use_exp(ppc_fp128 %a) { +; %result = call { ppc_fp128, i32 } @llvm.frexp.f128.i32(ppc_fp128 %a) +; %result.0 = extractvalue { ppc_fp128, i32 } %result, 1 +; ret i32 %result.0 +; } + +declare { float, i32 } @llvm.frexp.f32.i32(float) #0 +declare { <2 x float>, <2 x i32> } @llvm.frexp.v2f32.v2i32(<2 x float>) #0 + +declare { half, i32 } @llvm.frexp.f16.i32(half) #0 +declare { <2 x half>, <2 x i32> } @llvm.frexp.v2f16.v2i32(<2 x half>) #0 + +declare { double, i32 } @llvm.frexp.f64.i32(double) #0 +declare { <2 x double>, <2 x i32> } @llvm.frexp.v2f64.v2i32(<2 x double>) #0 + +declare { half, i16 } @llvm.frexp.f16.i16(half) #0 +declare { <2 x half>, <2 x i16> } @llvm.frexp.v2f16.v2i16(<2 x half>) #0 + +declare { ppc_fp128, i32 } @llvm.frexp.f128.i32(ppc_fp128) #0 + +attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } Index: llvm/test/CodeGen/X86/llvm.frexp.f80.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/X86/llvm.frexp.f80.ll @@ -0,0 +1,60 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; FIXME: Expansion for f80 +; RUN: llc -mtriple=x86_64-unknown-unknown < %s | FileCheck -check-prefixes=X64 %s +; XUN: llc -mtriple=i386-pc-win32 < %s | FileCheck -check-prefix=X64 %s + +define { x86_fp80, i32 } @test_frexp_f80_i32(x86_fp80 %a) { +; X64-LABEL: test_frexp_f80_i32: +; X64: # %bb.0: +; X64-NEXT: subq $24, %rsp +; X64-NEXT: .cfi_def_cfa_offset 32 +; X64-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-NEXT: fstpt (%rsp) +; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdi +; X64-NEXT: callq frexpl@PLT +; X64-NEXT: movl {{[0-9]+}}(%rsp), %eax +; X64-NEXT: addq $24, %rsp +; X64-NEXT: .cfi_def_cfa_offset 8 +; X64-NEXT: retq + %result = call { x86_fp80, i32 } @llvm.frexp.f80.i32(x86_fp80 %a) + ret { x86_fp80, i32 } %result +} + +define x86_fp80 @test_frexp_f80_i32_only_use_fract(x86_fp80 %a) { +; X64-LABEL: test_frexp_f80_i32_only_use_fract: +; X64: # %bb.0: +; X64-NEXT: subq $24, %rsp +; X64-NEXT: .cfi_def_cfa_offset 32 +; X64-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-NEXT: fstpt (%rsp) +; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdi +; X64-NEXT: callq frexpl@PLT +; X64-NEXT: addq $24, %rsp +; X64-NEXT: .cfi_def_cfa_offset 8 +; X64-NEXT: retq + %result = call { x86_fp80, i32 } @llvm.frexp.f80.i32(x86_fp80 %a) + %result.0 = extractvalue { x86_fp80, i32 } %result, 0 + ret x86_fp80 %result.0 +} + +define i32 @test_frexp_f80_i32_only_use_exp(x86_fp80 %a) { +; X64-LABEL: test_frexp_f80_i32_only_use_exp: +; X64: # %bb.0: +; X64-NEXT: subq $24, %rsp +; X64-NEXT: .cfi_def_cfa_offset 32 +; X64-NEXT: fldt {{[0-9]+}}(%rsp) +; X64-NEXT: fstpt (%rsp) +; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdi +; X64-NEXT: callq frexpl@PLT +; X64-NEXT: movl {{[0-9]+}}(%rsp), %eax +; X64-NEXT: addq $24, %rsp +; X64-NEXT: .cfi_def_cfa_offset 8 +; X64-NEXT: retq + %result = call { x86_fp80, i32 } @llvm.frexp.f80.i32(x86_fp80 %a) + %result.0 = extractvalue { x86_fp80, i32 } %result, 1 + ret i32 %result.0 +} + +declare { x86_fp80, i32 } @llvm.frexp.f80.i32(x86_fp80) #0 + +attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } Index: llvm/test/CodeGen/X86/llvm.frexp.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/X86/llvm.frexp.ll @@ -0,0 +1,917 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 +; RUN: llc -mtriple=x86_64-unknown-unknown < %s | FileCheck -check-prefixes=X64 %s +; RUN: llc -mtriple=i386-pc-win32 < %s | FileCheck -check-prefix=WIN32 %s + +; FIXME +; define { half, i32 } @test_frexp_f16_i32(half %a) { +; %result = call { half, i32 } @llvm.frexp.f16.i32(half %a) +; ret { half, i32 } %result +; } + +; define half @test_frexp_f16_i32_only_use_fract(half %a) { +; %result = call { half, i32 } @llvm.frexp.f16.i32(half %a) +; %result.0 = extractvalue { half, i32 } %result, 0 +; ret half %result.0 +; } + +; define i32 @test_frexp_f16_i32_only_use_exp(half %a) { +; %result = call { half, i32 } @llvm.frexp.f16.i32(half %a) +; %result.0 = extractvalue { half, i32 } %result, 1 +; ret i32 %result.0 +; } + +; define { <2 x half>, <2 x i32> } @test_frexp_v2f16_v2i32(<2 x half> %a) { +; %result = call { <2 x half>, <2 x i32> } @llvm.frexp.v2f16.v2i32(<2 x half> %a) +; ret { <2 x half>, <2 x i32> } %result +; } + +; define <2 x half> @test_frexp_v2f16_v2i32_only_use_fract(<2 x half> %a) { +; %result = call { <2 x half>, <2 x i32> } @llvm.frexp.v2f16.v2i32(<2 x half> %a) +; %result.0 = extractvalue { <2 x half>, <2 x i32> } %result, 0 +; ret <2 x half> %result.0 +; } + +; define <2 x i32> @test_frexp_v2f16_v2i32_only_use_exp(<2 x half> %a) { +; %result = call { <2 x half>, <2 x i32> } @llvm.frexp.v2f16.v2i32(<2 x half> %a) +; %result.1 = extractvalue { <2 x half>, <2 x i32> } %result, 1 +; ret <2 x i32> %result.1 +; } + +define { float, i32 } @test_frexp_f32_i32(float %a) { +; X64-LABEL: test_frexp_f32_i32: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: .cfi_def_cfa_offset 16 +; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdi +; X64-NEXT: callq frexpf@PLT +; X64-NEXT: movl {{[0-9]+}}(%rsp), %eax +; X64-NEXT: popq %rcx +; X64-NEXT: .cfi_def_cfa_offset 8 +; X64-NEXT: retq +; +; WIN32-LABEL: test_frexp_f32_i32: +; WIN32: # %bb.0: +; WIN32-NEXT: subl $12, %esp +; WIN32-NEXT: flds {{[0-9]+}}(%esp) +; WIN32-NEXT: fld %st(0) +; WIN32-NEXT: fmuls __real@4c000000 +; WIN32-NEXT: fstps {{[0-9]+}}(%esp) +; WIN32-NEXT: fsts (%esp) +; WIN32-NEXT: movl (%esp), %eax +; WIN32-NEXT: movl %eax, %ecx +; WIN32-NEXT: andl $2147483647, %ecx # imm = 0x7FFFFFFF +; WIN32-NEXT: cmpl $8388608, %ecx # imm = 0x800000 +; WIN32-NEXT: jb LBB0_1 +; WIN32-NEXT: # %bb.2: +; WIN32-NEXT: movl %ecx, %edx +; WIN32-NEXT: jmp LBB0_3 +; WIN32-NEXT: LBB0_1: +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax +; WIN32-NEXT: movl %eax, %edx +; WIN32-NEXT: andl $2139095040, %edx # imm = 0x7F800000 +; WIN32-NEXT: LBB0_3: +; WIN32-NEXT: shrl $23, %edx +; WIN32-NEXT: cmpl $8388608, %ecx # imm = 0x800000 +; WIN32-NEXT: jae LBB0_5 +; WIN32-NEXT: # %bb.4: +; WIN32-NEXT: addl $-25, %edx +; WIN32-NEXT: LBB0_5: +; WIN32-NEXT: andl $-2139095041, %eax # imm = 0x807FFFFF +; WIN32-NEXT: orl $1056964608, %eax # imm = 0x3F000000 +; WIN32-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN32-NEXT: addl $-126, %edx +; WIN32-NEXT: addl $-2139095040, %ecx # imm = 0x80800000 +; WIN32-NEXT: xorl %eax, %eax +; WIN32-NEXT: cmpl $-2139095040, %ecx # imm = 0x80800000 +; WIN32-NEXT: jbe LBB0_7 +; WIN32-NEXT: # %bb.6: +; WIN32-NEXT: movl %edx, %eax +; WIN32-NEXT: LBB0_7: +; WIN32-NEXT: flds {{[0-9]+}}(%esp) +; WIN32-NEXT: jbe LBB0_9 +; WIN32-NEXT: # %bb.8: +; WIN32-NEXT: fstp %st(1) +; WIN32-NEXT: fldz +; WIN32-NEXT: LBB0_9: +; WIN32-NEXT: fstp %st(0) +; WIN32-NEXT: addl $12, %esp +; WIN32-NEXT: retl + %result = call { float, i32 } @llvm.frexp.f32.i32(float %a) + ret { float, i32 } %result +} + +define float @test_frexp_f32_i32_only_use_fract(float %a) { +; X64-LABEL: test_frexp_f32_i32_only_use_fract: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: .cfi_def_cfa_offset 16 +; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdi +; X64-NEXT: callq frexpf@PLT +; X64-NEXT: popq %rax +; X64-NEXT: .cfi_def_cfa_offset 8 +; X64-NEXT: retq +; +; WIN32-LABEL: test_frexp_f32_i32_only_use_fract: +; WIN32: # %bb.0: +; WIN32-NEXT: subl $12, %esp +; WIN32-NEXT: flds {{[0-9]+}}(%esp) +; WIN32-NEXT: fsts (%esp) +; WIN32-NEXT: fld %st(0) +; WIN32-NEXT: fmuls __real@4c000000 +; WIN32-NEXT: fstps {{[0-9]+}}(%esp) +; WIN32-NEXT: movl (%esp), %ecx +; WIN32-NEXT: movl %ecx, %eax +; WIN32-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; WIN32-NEXT: cmpl $8388608, %eax # imm = 0x800000 +; WIN32-NEXT: jae LBB1_2 +; WIN32-NEXT: # %bb.1: +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; WIN32-NEXT: LBB1_2: +; WIN32-NEXT: andl $-2139095041, %ecx # imm = 0x807FFFFF +; WIN32-NEXT: orl $1056964608, %ecx # imm = 0x3F000000 +; WIN32-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; WIN32-NEXT: addl $-2139095040, %eax # imm = 0x80800000 +; WIN32-NEXT: flds {{[0-9]+}}(%esp) +; WIN32-NEXT: cmpl $-2139095040, %eax # imm = 0x80800000 +; WIN32-NEXT: jbe LBB1_4 +; WIN32-NEXT: # %bb.3: +; WIN32-NEXT: fstp %st(1) +; WIN32-NEXT: fldz +; WIN32-NEXT: LBB1_4: +; WIN32-NEXT: fstp %st(0) +; WIN32-NEXT: addl $12, %esp +; WIN32-NEXT: retl + %result = call { float, i32 } @llvm.frexp.f32.i32(float %a) + %result.0 = extractvalue { float, i32 } %result, 0 + ret float %result.0 +} + +define i32 @test_frexp_f32_i32_only_use_exp(float %a) { +; X64-LABEL: test_frexp_f32_i32_only_use_exp: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: .cfi_def_cfa_offset 16 +; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdi +; X64-NEXT: callq frexpf@PLT +; X64-NEXT: movl {{[0-9]+}}(%rsp), %eax +; X64-NEXT: popq %rcx +; X64-NEXT: .cfi_def_cfa_offset 8 +; X64-NEXT: retq +; +; WIN32-LABEL: test_frexp_f32_i32_only_use_exp: +; WIN32: # %bb.0: +; WIN32-NEXT: subl $8, %esp +; WIN32-NEXT: flds {{[0-9]+}}(%esp) +; WIN32-NEXT: fld %st(0) +; WIN32-NEXT: fmuls __real@4c000000 +; WIN32-NEXT: fstps {{[0-9]+}}(%esp) +; WIN32-NEXT: fstps (%esp) +; WIN32-NEXT: movl $2147483647, %ecx # imm = 0x7FFFFFFF +; WIN32-NEXT: andl (%esp), %ecx +; WIN32-NEXT: cmpl $8388608, %ecx # imm = 0x800000 +; WIN32-NEXT: jb LBB2_1 +; WIN32-NEXT: # %bb.2: +; WIN32-NEXT: movl %ecx, %edx +; WIN32-NEXT: jmp LBB2_3 +; WIN32-NEXT: LBB2_1: +; WIN32-NEXT: movl $2139095040, %edx # imm = 0x7F800000 +; WIN32-NEXT: andl {{[0-9]+}}(%esp), %edx +; WIN32-NEXT: LBB2_3: +; WIN32-NEXT: shrl $23, %edx +; WIN32-NEXT: cmpl $8388608, %ecx # imm = 0x800000 +; WIN32-NEXT: jae LBB2_5 +; WIN32-NEXT: # %bb.4: +; WIN32-NEXT: addl $-25, %edx +; WIN32-NEXT: LBB2_5: +; WIN32-NEXT: addl $-2139095040, %ecx # imm = 0x80800000 +; WIN32-NEXT: xorl %eax, %eax +; WIN32-NEXT: cmpl $-2139095040, %ecx # imm = 0x80800000 +; WIN32-NEXT: jbe LBB2_7 +; WIN32-NEXT: # %bb.6: +; WIN32-NEXT: addl $-126, %edx +; WIN32-NEXT: movl %edx, %eax +; WIN32-NEXT: LBB2_7: +; WIN32-NEXT: addl $8, %esp +; WIN32-NEXT: retl + %result = call { float, i32 } @llvm.frexp.f32.i32(float %a) + %result.0 = extractvalue { float, i32 } %result, 1 + ret i32 %result.0 +} + +; FIXME: Widen vector result +; define { <2 x float>, <2 x i32> } @test_frexp_v2f32_v2i32(<2 x float> %a) { +; %result = call { <2 x float>, <2 x i32> } @llvm.frexp.v2f32.v2i32(<2 x float> %a) +; ret { <2 x float>, <2 x i32> } %result +; } + +; define <2 x float> @test_frexp_v2f32_v2i32_only_use_fract(<2 x float> %a) { +; %result = call { <2 x float>, <2 x i32> } @llvm.frexp.v2f32.v2i32(<2 x float> %a) +; %result.0 = extractvalue { <2 x float>, <2 x i32> } %result, 0 +; ret <2 x float> %result.0 +; } + +; define <2 x i32> @test_frexp_v2f32_v2i32_only_use_exp(<2 x float> %a) { +; %result = call { <2 x float>, <2 x i32> } @llvm.frexp.v2f32.v2i32(<2 x float> %a) +; %result.1 = extractvalue { <2 x float>, <2 x i32> } %result, 1 +; ret <2 x i32> %result.1 +; } + +define { <4 x float>, <4 x i32> } @test_frexp_v4f32_v4i32(<4 x float> %a) { +; X64-LABEL: test_frexp_v4f32_v4i32: +; X64: # %bb.0: +; X64-NEXT: subq $72, %rsp +; X64-NEXT: .cfi_def_cfa_offset 80 +; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] +; X64-NEXT: movq %rsp, %rdi +; X64-NEXT: callq frexpf@PLT +; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; X64-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdi +; X64-NEXT: callq frexpf@PLT +; X64-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload +; X64-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdi +; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; X64-NEXT: callq frexpf@PLT +; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] +; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdi +; X64-NEXT: callq frexpf@PLT +; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; X64-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; X64-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload +; X64-NEXT: # xmm1 = xmm1[0],mem[0] +; X64-NEXT: movaps %xmm1, %xmm0 +; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X64-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; X64-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X64-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero +; X64-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] +; X64-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] +; X64-NEXT: addq $72, %rsp +; X64-NEXT: .cfi_def_cfa_offset 8 +; X64-NEXT: retq +; +; WIN32-LABEL: test_frexp_v4f32_v4i32: +; WIN32: # %bb.0: +; WIN32-NEXT: pushl %ebp +; WIN32-NEXT: pushl %ebx +; WIN32-NEXT: pushl %edi +; WIN32-NEXT: pushl %esi +; WIN32-NEXT: subl $68, %esp +; WIN32-NEXT: flds {{[0-9]+}}(%esp) +; WIN32-NEXT: flds __real@4c000000 +; WIN32-NEXT: fld %st(1) +; WIN32-NEXT: fmul %st(1), %st +; WIN32-NEXT: fstps {{[0-9]+}}(%esp) +; WIN32-NEXT: fxch %st(1) +; WIN32-NEXT: fsts {{[0-9]+}}(%esp) +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebx +; WIN32-NEXT: movl %ebx, %eax +; WIN32-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; WIN32-NEXT: cmpl $8388608, %eax # imm = 0x800000 +; WIN32-NEXT: jb LBB3_1 +; WIN32-NEXT: # %bb.2: +; WIN32-NEXT: movl %eax, %ecx +; WIN32-NEXT: jmp LBB3_3 +; WIN32-NEXT: LBB3_1: +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebx +; WIN32-NEXT: movl %ebx, %ecx +; WIN32-NEXT: andl $2139095040, %ecx # imm = 0x7F800000 +; WIN32-NEXT: LBB3_3: +; WIN32-NEXT: shrl $23, %ecx +; WIN32-NEXT: cmpl $8388608, %eax # imm = 0x800000 +; WIN32-NEXT: flds {{[0-9]+}}(%esp) +; WIN32-NEXT: fld %st(0) +; WIN32-NEXT: fmul %st(3), %st +; WIN32-NEXT: fstps {{[0-9]+}}(%esp) +; WIN32-NEXT: fsts {{[0-9]+}}(%esp) +; WIN32-NEXT: jae LBB3_5 +; WIN32-NEXT: # %bb.4: +; WIN32-NEXT: addl $-25, %ecx +; WIN32-NEXT: LBB3_5: +; WIN32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; WIN32-NEXT: movl %ecx, %edx +; WIN32-NEXT: andl $2147483647, %edx # imm = 0x7FFFFFFF +; WIN32-NEXT: cmpl $8388608, %edx # imm = 0x800000 +; WIN32-NEXT: jb LBB3_6 +; WIN32-NEXT: # %bb.7: +; WIN32-NEXT: movl %edx, %esi +; WIN32-NEXT: jmp LBB3_8 +; WIN32-NEXT: LBB3_6: +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; WIN32-NEXT: movl %ecx, %esi +; WIN32-NEXT: andl $2139095040, %esi # imm = 0x7F800000 +; WIN32-NEXT: LBB3_8: +; WIN32-NEXT: shrl $23, %esi +; WIN32-NEXT: cmpl $8388608, %edx # imm = 0x800000 +; WIN32-NEXT: flds {{[0-9]+}}(%esp) +; WIN32-NEXT: fld %st(0) +; WIN32-NEXT: fmul %st(4), %st +; WIN32-NEXT: fstps {{[0-9]+}}(%esp) +; WIN32-NEXT: fsts {{[0-9]+}}(%esp) +; WIN32-NEXT: jae LBB3_10 +; WIN32-NEXT: # %bb.9: +; WIN32-NEXT: addl $-25, %esi +; WIN32-NEXT: LBB3_10: +; WIN32-NEXT: movl %esi, (%esp) # 4-byte Spill +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edi +; WIN32-NEXT: movl %edi, %ebp +; WIN32-NEXT: andl $2147483647, %ebp # imm = 0x7FFFFFFF +; WIN32-NEXT: cmpl $8388608, %ebp # imm = 0x800000 +; WIN32-NEXT: jb LBB3_11 +; WIN32-NEXT: # %bb.12: +; WIN32-NEXT: movl %ebp, %esi +; WIN32-NEXT: jmp LBB3_13 +; WIN32-NEXT: LBB3_11: +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edi +; WIN32-NEXT: movl %edi, %esi +; WIN32-NEXT: andl $2139095040, %esi # imm = 0x7F800000 +; WIN32-NEXT: LBB3_13: +; WIN32-NEXT: shrl $23, %esi +; WIN32-NEXT: cmpl $8388608, %ebp # imm = 0x800000 +; WIN32-NEXT: flds {{[0-9]+}}(%esp) +; WIN32-NEXT: fmul %st, %st(4) +; WIN32-NEXT: fxch %st(4) +; WIN32-NEXT: fstps {{[0-9]+}}(%esp) +; WIN32-NEXT: fxch %st(3) +; WIN32-NEXT: fsts {{[0-9]+}}(%esp) +; WIN32-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; WIN32-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; WIN32-NEXT: jae LBB3_15 +; WIN32-NEXT: # %bb.14: +; WIN32-NEXT: addl $-25, %esi +; WIN32-NEXT: LBB3_15: +; WIN32-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax +; WIN32-NEXT: movl %eax, %edx +; WIN32-NEXT: andl $2147483647, %edx # imm = 0x7FFFFFFF +; WIN32-NEXT: cmpl $8388608, %edx # imm = 0x800000 +; WIN32-NEXT: jb LBB3_16 +; WIN32-NEXT: # %bb.17: +; WIN32-NEXT: movl %edx, %esi +; WIN32-NEXT: jmp LBB3_18 +; WIN32-NEXT: LBB3_16: +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax +; WIN32-NEXT: movl %eax, %esi +; WIN32-NEXT: andl $2139095040, %esi # imm = 0x7F800000 +; WIN32-NEXT: LBB3_18: +; WIN32-NEXT: shrl $23, %esi +; WIN32-NEXT: cmpl $8388608, %edx # imm = 0x800000 +; WIN32-NEXT: jae LBB3_20 +; WIN32-NEXT: # %bb.19: +; WIN32-NEXT: addl $-25, %esi +; WIN32-NEXT: LBB3_20: +; WIN32-NEXT: andl $-2139095041, %eax # imm = 0x807FFFFF +; WIN32-NEXT: orl $1056964608, %eax # imm = 0x3F000000 +; WIN32-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN32-NEXT: andl $-2139095041, %ebx # imm = 0x807FFFFF +; WIN32-NEXT: orl $1056964608, %ebx # imm = 0x3F000000 +; WIN32-NEXT: movl %ebx, {{[0-9]+}}(%esp) +; WIN32-NEXT: andl $-2139095041, %ecx # imm = 0x807FFFFF +; WIN32-NEXT: orl $1056964608, %ecx # imm = 0x3F000000 +; WIN32-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; WIN32-NEXT: andl $-2139095041, %edi # imm = 0x807FFFFF +; WIN32-NEXT: orl $1056964608, %edi # imm = 0x3F000000 +; WIN32-NEXT: movl %edi, {{[0-9]+}}(%esp) +; WIN32-NEXT: addl $-126, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; WIN32-NEXT: addl $-126, (%esp) # 4-byte Folded Spill +; WIN32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; WIN32-NEXT: addl $-126, %ecx +; WIN32-NEXT: addl $-126, %esi +; WIN32-NEXT: addl $-2139095040, %edx # imm = 0x80800000 +; WIN32-NEXT: xorl %edi, %edi +; WIN32-NEXT: cmpl $-2139095040, %edx # imm = 0x80800000 +; WIN32-NEXT: movl $0, %ebx +; WIN32-NEXT: jbe LBB3_22 +; WIN32-NEXT: # %bb.21: +; WIN32-NEXT: movl %esi, %ebx +; WIN32-NEXT: LBB3_22: +; WIN32-NEXT: flds {{[0-9]+}}(%esp) +; WIN32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; WIN32-NEXT: jbe LBB3_24 +; WIN32-NEXT: # %bb.23: +; WIN32-NEXT: fstp %st(1) +; WIN32-NEXT: fldz +; WIN32-NEXT: LBB3_24: +; WIN32-NEXT: fstp %st(0) +; WIN32-NEXT: addl $-2139095040, %eax # imm = 0x80800000 +; WIN32-NEXT: cmpl $-2139095040, %eax # imm = 0x80800000 +; WIN32-NEXT: movl $0, %edx +; WIN32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; WIN32-NEXT: jbe LBB3_26 +; WIN32-NEXT: # %bb.25: +; WIN32-NEXT: movl %ecx, %edx +; WIN32-NEXT: LBB3_26: +; WIN32-NEXT: flds {{[0-9]+}}(%esp) +; WIN32-NEXT: jbe LBB3_28 +; WIN32-NEXT: # %bb.27: +; WIN32-NEXT: fstp %st(3) +; WIN32-NEXT: fldz +; WIN32-NEXT: LBB3_28: +; WIN32-NEXT: fstp %st(0) +; WIN32-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; WIN32-NEXT: addl $-2139095040, %eax # imm = 0x80800000 +; WIN32-NEXT: cmpl $-2139095040, %eax # imm = 0x80800000 +; WIN32-NEXT: movl $0, %ecx +; WIN32-NEXT: jbe LBB3_30 +; WIN32-NEXT: # %bb.29: +; WIN32-NEXT: movl (%esp), %ecx # 4-byte Reload +; WIN32-NEXT: LBB3_30: +; WIN32-NEXT: flds {{[0-9]+}}(%esp) +; WIN32-NEXT: jbe LBB3_32 +; WIN32-NEXT: # %bb.31: +; WIN32-NEXT: fstp %st(2) +; WIN32-NEXT: fldz +; WIN32-NEXT: LBB3_32: +; WIN32-NEXT: fstp %st(0) +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax +; WIN32-NEXT: addl $-2139095040, %ebp # imm = 0x80800000 +; WIN32-NEXT: cmpl $-2139095040, %ebp # imm = 0x80800000 +; WIN32-NEXT: jbe LBB3_34 +; WIN32-NEXT: # %bb.33: +; WIN32-NEXT: movl %esi, %edi +; WIN32-NEXT: LBB3_34: +; WIN32-NEXT: flds {{[0-9]+}}(%esp) +; WIN32-NEXT: jbe LBB3_36 +; WIN32-NEXT: # %bb.35: +; WIN32-NEXT: fstp %st(4) +; WIN32-NEXT: fldz +; WIN32-NEXT: LBB3_36: +; WIN32-NEXT: fstp %st(0) +; WIN32-NEXT: movl %edi, 28(%eax) +; WIN32-NEXT: movl %ecx, 24(%eax) +; WIN32-NEXT: movl %edx, 20(%eax) +; WIN32-NEXT: movl %ebx, 16(%eax) +; WIN32-NEXT: fxch %st(3) +; WIN32-NEXT: fstps 12(%eax) +; WIN32-NEXT: fstps 8(%eax) +; WIN32-NEXT: fstps 4(%eax) +; WIN32-NEXT: fstps (%eax) +; WIN32-NEXT: addl $68, %esp +; WIN32-NEXT: popl %esi +; WIN32-NEXT: popl %edi +; WIN32-NEXT: popl %ebx +; WIN32-NEXT: popl %ebp +; WIN32-NEXT: retl + %result = call { <4 x float>, <4 x i32> } @llvm.frexp.v4f32.v4i32(<4 x float> %a) + ret { <4 x float>, <4 x i32> } %result +} + +define <4 x float> @test_frexp_v4f32_v4i32_only_use_fract(<4 x float> %a) { +; X64-LABEL: test_frexp_v4f32_v4i32_only_use_fract: +; X64: # %bb.0: +; X64-NEXT: subq $72, %rsp +; X64-NEXT: .cfi_def_cfa_offset 80 +; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] +; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdi +; X64-NEXT: callq frexpf@PLT +; X64-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; X64-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdi +; X64-NEXT: callq frexpf@PLT +; X64-NEXT: unpcklps (%rsp), %xmm0 # 16-byte Folded Reload +; X64-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] +; X64-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdi +; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; X64-NEXT: callq frexpf@PLT +; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] +; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdi +; X64-NEXT: callq frexpf@PLT +; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload +; X64-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; X64-NEXT: unpcklpd (%rsp), %xmm1 # 16-byte Folded Reload +; X64-NEXT: # xmm1 = xmm1[0],mem[0] +; X64-NEXT: movaps %xmm1, %xmm0 +; X64-NEXT: addq $72, %rsp +; X64-NEXT: .cfi_def_cfa_offset 8 +; X64-NEXT: retq +; +; WIN32-LABEL: test_frexp_v4f32_v4i32_only_use_fract: +; WIN32: # %bb.0: +; WIN32-NEXT: pushl %edi +; WIN32-NEXT: pushl %esi +; WIN32-NEXT: subl $48, %esp +; WIN32-NEXT: flds {{[0-9]+}}(%esp) +; WIN32-NEXT: flds {{[0-9]+}}(%esp) +; WIN32-NEXT: flds {{[0-9]+}}(%esp) +; WIN32-NEXT: flds {{[0-9]+}}(%esp) +; WIN32-NEXT: fsts {{[0-9]+}}(%esp) +; WIN32-NEXT: flds __real@4c000000 +; WIN32-NEXT: fld %st(1) +; WIN32-NEXT: fmul %st(1), %st +; WIN32-NEXT: fstps {{[0-9]+}}(%esp) +; WIN32-NEXT: fxch %st(2) +; WIN32-NEXT: fsts {{[0-9]+}}(%esp) +; WIN32-NEXT: fld %st(0) +; WIN32-NEXT: fmul %st(3), %st +; WIN32-NEXT: fstps {{[0-9]+}}(%esp) +; WIN32-NEXT: fxch %st(3) +; WIN32-NEXT: fsts {{[0-9]+}}(%esp) +; WIN32-NEXT: fld %st(0) +; WIN32-NEXT: fmul %st(3), %st +; WIN32-NEXT: fstps {{[0-9]+}}(%esp) +; WIN32-NEXT: fxch %st(4) +; WIN32-NEXT: fsts (%esp) +; WIN32-NEXT: fmul %st, %st(2) +; WIN32-NEXT: fxch %st(2) +; WIN32-NEXT: fstps {{[0-9]+}}(%esp) +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; WIN32-NEXT: movl %ecx, %eax +; WIN32-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF +; WIN32-NEXT: cmpl $8388608, %eax # imm = 0x800000 +; WIN32-NEXT: jae LBB4_2 +; WIN32-NEXT: # %bb.1: +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ecx +; WIN32-NEXT: LBB4_2: +; WIN32-NEXT: andl $-2139095041, %ecx # imm = 0x807FFFFF +; WIN32-NEXT: orl $1056964608, %ecx # imm = 0x3F000000 +; WIN32-NEXT: movl %ecx, {{[0-9]+}}(%esp) +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx +; WIN32-NEXT: movl %edx, %ecx +; WIN32-NEXT: andl $2147483647, %ecx # imm = 0x7FFFFFFF +; WIN32-NEXT: cmpl $8388608, %ecx # imm = 0x800000 +; WIN32-NEXT: jae LBB4_4 +; WIN32-NEXT: # %bb.3: +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx +; WIN32-NEXT: LBB4_4: +; WIN32-NEXT: andl $-2139095041, %edx # imm = 0x807FFFFF +; WIN32-NEXT: orl $1056964608, %edx # imm = 0x3F000000 +; WIN32-NEXT: movl %edx, {{[0-9]+}}(%esp) +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi +; WIN32-NEXT: movl %esi, %edx +; WIN32-NEXT: andl $2147483647, %edx # imm = 0x7FFFFFFF +; WIN32-NEXT: cmpl $8388608, %edx # imm = 0x800000 +; WIN32-NEXT: jae LBB4_6 +; WIN32-NEXT: # %bb.5: +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi +; WIN32-NEXT: LBB4_6: +; WIN32-NEXT: andl $-2139095041, %esi # imm = 0x807FFFFF +; WIN32-NEXT: orl $1056964608, %esi # imm = 0x3F000000 +; WIN32-NEXT: movl %esi, {{[0-9]+}}(%esp) +; WIN32-NEXT: movl (%esp), %edi +; WIN32-NEXT: movl %edi, %esi +; WIN32-NEXT: andl $2147483647, %esi # imm = 0x7FFFFFFF +; WIN32-NEXT: cmpl $8388608, %esi # imm = 0x800000 +; WIN32-NEXT: jae LBB4_8 +; WIN32-NEXT: # %bb.7: +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edi +; WIN32-NEXT: LBB4_8: +; WIN32-NEXT: andl $-2139095041, %edi # imm = 0x807FFFFF +; WIN32-NEXT: orl $1056964608, %edi # imm = 0x3F000000 +; WIN32-NEXT: movl %edi, {{[0-9]+}}(%esp) +; WIN32-NEXT: addl $-2139095040, %eax # imm = 0x80800000 +; WIN32-NEXT: flds {{[0-9]+}}(%esp) +; WIN32-NEXT: cmpl $-2139095040, %eax # imm = 0x80800000 +; WIN32-NEXT: jbe LBB4_10 +; WIN32-NEXT: # %bb.9: +; WIN32-NEXT: fstp %st(1) +; WIN32-NEXT: fldz +; WIN32-NEXT: LBB4_10: +; WIN32-NEXT: fstp %st(0) +; WIN32-NEXT: addl $-2139095040, %ecx # imm = 0x80800000 +; WIN32-NEXT: flds {{[0-9]+}}(%esp) +; WIN32-NEXT: cmpl $-2139095040, %ecx # imm = 0x80800000 +; WIN32-NEXT: jbe LBB4_12 +; WIN32-NEXT: # %bb.11: +; WIN32-NEXT: fstp %st(3) +; WIN32-NEXT: fldz +; WIN32-NEXT: LBB4_12: +; WIN32-NEXT: fstp %st(0) +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax +; WIN32-NEXT: addl $-2139095040, %edx # imm = 0x80800000 +; WIN32-NEXT: flds {{[0-9]+}}(%esp) +; WIN32-NEXT: cmpl $-2139095040, %edx # imm = 0x80800000 +; WIN32-NEXT: jbe LBB4_14 +; WIN32-NEXT: # %bb.13: +; WIN32-NEXT: fstp %st(4) +; WIN32-NEXT: fldz +; WIN32-NEXT: LBB4_14: +; WIN32-NEXT: fstp %st(0) +; WIN32-NEXT: addl $-2139095040, %esi # imm = 0x80800000 +; WIN32-NEXT: flds {{[0-9]+}}(%esp) +; WIN32-NEXT: cmpl $-2139095040, %esi # imm = 0x80800000 +; WIN32-NEXT: jbe LBB4_16 +; WIN32-NEXT: # %bb.15: +; WIN32-NEXT: fstp %st(2) +; WIN32-NEXT: fldz +; WIN32-NEXT: LBB4_16: +; WIN32-NEXT: fstp %st(0) +; WIN32-NEXT: fxch %st(1) +; WIN32-NEXT: fstps 12(%eax) +; WIN32-NEXT: fxch %st(2) +; WIN32-NEXT: fstps 8(%eax) +; WIN32-NEXT: fstps 4(%eax) +; WIN32-NEXT: fstps (%eax) +; WIN32-NEXT: addl $48, %esp +; WIN32-NEXT: popl %esi +; WIN32-NEXT: popl %edi +; WIN32-NEXT: retl + %result = call { <4 x float>, <4 x i32> } @llvm.frexp.v4f32.v4i32(<4 x float> %a) + %result.0 = extractvalue { <4 x float>, <4 x i32> } %result, 0 + ret <4 x float> %result.0 +} + +define <4 x i32> @test_frexp_v4f32_v4i32_only_use_exp(<4 x float> %a) { +; X64-LABEL: test_frexp_v4f32_v4i32_only_use_exp: +; X64: # %bb.0: +; X64-NEXT: subq $40, %rsp +; X64-NEXT: .cfi_def_cfa_offset 48 +; X64-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] +; X64-NEXT: movq %rsp, %rdi +; X64-NEXT: callq frexpf@PLT +; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; X64-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] +; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdi +; X64-NEXT: callq frexpf@PLT +; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdi +; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; X64-NEXT: callq frexpf@PLT +; X64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload +; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] +; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdi +; X64-NEXT: callq frexpf@PLT +; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; X64-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero +; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; X64-NEXT: addq $40, %rsp +; X64-NEXT: .cfi_def_cfa_offset 8 +; X64-NEXT: retq +; +; WIN32-LABEL: test_frexp_v4f32_v4i32_only_use_exp: +; WIN32: # %bb.0: +; WIN32-NEXT: pushl %ebp +; WIN32-NEXT: pushl %ebx +; WIN32-NEXT: pushl %edi +; WIN32-NEXT: pushl %esi +; WIN32-NEXT: subl $36, %esp +; WIN32-NEXT: flds {{[0-9]+}}(%esp) +; WIN32-NEXT: flds __real@4c000000 +; WIN32-NEXT: fld %st(1) +; WIN32-NEXT: fmul %st(1), %st +; WIN32-NEXT: fstps {{[0-9]+}}(%esp) +; WIN32-NEXT: fxch %st(1) +; WIN32-NEXT: fstps {{[0-9]+}}(%esp) +; WIN32-NEXT: movl $2139095040, %ecx # imm = 0x7F800000 +; WIN32-NEXT: movl $2147483647, %edx # imm = 0x7FFFFFFF +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax +; WIN32-NEXT: andl %edx, %eax +; WIN32-NEXT: cmpl $8388608, %eax # imm = 0x800000 +; WIN32-NEXT: jb LBB5_1 +; WIN32-NEXT: # %bb.2: +; WIN32-NEXT: movl %eax, %esi +; WIN32-NEXT: jmp LBB5_3 +; WIN32-NEXT: LBB5_1: +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi +; WIN32-NEXT: andl %ecx, %esi +; WIN32-NEXT: LBB5_3: +; WIN32-NEXT: shrl $23, %esi +; WIN32-NEXT: cmpl $8388608, %eax # imm = 0x800000 +; WIN32-NEXT: flds {{[0-9]+}}(%esp) +; WIN32-NEXT: fld %st(0) +; WIN32-NEXT: fmul %st(2), %st +; WIN32-NEXT: fstps {{[0-9]+}}(%esp) +; WIN32-NEXT: fstps {{[0-9]+}}(%esp) +; WIN32-NEXT: jae LBB5_5 +; WIN32-NEXT: # %bb.4: +; WIN32-NEXT: addl $-25, %esi +; WIN32-NEXT: LBB5_5: +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edi +; WIN32-NEXT: andl %edx, %edi +; WIN32-NEXT: cmpl $8388608, %edi # imm = 0x800000 +; WIN32-NEXT: jb LBB5_6 +; WIN32-NEXT: # %bb.7: +; WIN32-NEXT: movl %edi, %ebx +; WIN32-NEXT: jmp LBB5_8 +; WIN32-NEXT: LBB5_6: +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebx +; WIN32-NEXT: andl %ecx, %ebx +; WIN32-NEXT: LBB5_8: +; WIN32-NEXT: shrl $23, %ebx +; WIN32-NEXT: cmpl $8388608, %edi # imm = 0x800000 +; WIN32-NEXT: flds {{[0-9]+}}(%esp) +; WIN32-NEXT: fld %st(0) +; WIN32-NEXT: fmul %st(2), %st +; WIN32-NEXT: fstps {{[0-9]+}}(%esp) +; WIN32-NEXT: fstps {{[0-9]+}}(%esp) +; WIN32-NEXT: jae LBB5_10 +; WIN32-NEXT: # %bb.9: +; WIN32-NEXT: addl $-25, %ebx +; WIN32-NEXT: LBB5_10: +; WIN32-NEXT: movl %esi, (%esp) # 4-byte Spill +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebp +; WIN32-NEXT: andl %edx, %ebp +; WIN32-NEXT: cmpl $8388608, %ebp # imm = 0x800000 +; WIN32-NEXT: jb LBB5_11 +; WIN32-NEXT: # %bb.12: +; WIN32-NEXT: movl %ebp, %esi +; WIN32-NEXT: jmp LBB5_13 +; WIN32-NEXT: LBB5_11: +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi +; WIN32-NEXT: andl %ecx, %esi +; WIN32-NEXT: LBB5_13: +; WIN32-NEXT: shrl $23, %esi +; WIN32-NEXT: cmpl $8388608, %ebp # imm = 0x800000 +; WIN32-NEXT: flds {{[0-9]+}}(%esp) +; WIN32-NEXT: fmul %st, %st(1) +; WIN32-NEXT: fxch %st(1) +; WIN32-NEXT: fstps {{[0-9]+}}(%esp) +; WIN32-NEXT: fstps {{[0-9]+}}(%esp) +; WIN32-NEXT: jae LBB5_15 +; WIN32-NEXT: # %bb.14: +; WIN32-NEXT: addl $-25, %esi +; WIN32-NEXT: LBB5_15: +; WIN32-NEXT: andl {{[0-9]+}}(%esp), %edx +; WIN32-NEXT: cmpl $8388608, %edx # imm = 0x800000 +; WIN32-NEXT: jb LBB5_16 +; WIN32-NEXT: # %bb.17: +; WIN32-NEXT: movl %edx, %ecx +; WIN32-NEXT: jmp LBB5_18 +; WIN32-NEXT: LBB5_16: +; WIN32-NEXT: andl {{[0-9]+}}(%esp), %ecx +; WIN32-NEXT: LBB5_18: +; WIN32-NEXT: shrl $23, %ecx +; WIN32-NEXT: cmpl $8388608, %edx # imm = 0x800000 +; WIN32-NEXT: jae LBB5_20 +; WIN32-NEXT: # %bb.19: +; WIN32-NEXT: addl $-25, %ecx +; WIN32-NEXT: LBB5_20: +; WIN32-NEXT: addl $-2139095040, %eax # imm = 0x80800000 +; WIN32-NEXT: cmpl $-2139095040, %eax # imm = 0x80800000 +; WIN32-NEXT: movl $0, %eax +; WIN32-NEXT: jbe LBB5_22 +; WIN32-NEXT: # %bb.21: +; WIN32-NEXT: movl (%esp), %eax # 4-byte Reload +; WIN32-NEXT: addl $-126, %eax +; WIN32-NEXT: LBB5_22: +; WIN32-NEXT: movl %eax, (%esp) # 4-byte Spill +; WIN32-NEXT: addl $-2139095040, %edi # imm = 0x80800000 +; WIN32-NEXT: cmpl $-2139095040, %edi # imm = 0x80800000 +; WIN32-NEXT: movl $0, %edi +; WIN32-NEXT: jbe LBB5_24 +; WIN32-NEXT: # %bb.23: +; WIN32-NEXT: addl $-126, %ebx +; WIN32-NEXT: movl %ebx, %edi +; WIN32-NEXT: LBB5_24: +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax +; WIN32-NEXT: addl $-2139095040, %ebp # imm = 0x80800000 +; WIN32-NEXT: cmpl $-2139095040, %ebp # imm = 0x80800000 +; WIN32-NEXT: movl $0, %ebx +; WIN32-NEXT: jbe LBB5_26 +; WIN32-NEXT: # %bb.25: +; WIN32-NEXT: addl $-126, %esi +; WIN32-NEXT: movl %esi, %ebx +; WIN32-NEXT: LBB5_26: +; WIN32-NEXT: addl $-2139095040, %edx # imm = 0x80800000 +; WIN32-NEXT: cmpl $-2139095040, %edx # imm = 0x80800000 +; WIN32-NEXT: movl $0, %edx +; WIN32-NEXT: jbe LBB5_28 +; WIN32-NEXT: # %bb.27: +; WIN32-NEXT: addl $-126, %ecx +; WIN32-NEXT: movl %ecx, %edx +; WIN32-NEXT: LBB5_28: +; WIN32-NEXT: movl %edx, 12(%eax) +; WIN32-NEXT: movl %ebx, 8(%eax) +; WIN32-NEXT: movl %edi, 4(%eax) +; WIN32-NEXT: movl (%esp), %ecx # 4-byte Reload +; WIN32-NEXT: movl %ecx, (%eax) +; WIN32-NEXT: addl $36, %esp +; WIN32-NEXT: popl %esi +; WIN32-NEXT: popl %edi +; WIN32-NEXT: popl %ebx +; WIN32-NEXT: popl %ebp +; WIN32-NEXT: retl + %result = call { <4 x float>, <4 x i32> } @llvm.frexp.v4f32.v4i32(<4 x float> %a) + %result.1 = extractvalue { <4 x float>, <4 x i32> } %result, 1 + ret <4 x i32> %result.1 +} + +define { double, i32 } @test_frexp_f64_i32(double %a) { +; X64-LABEL: test_frexp_f64_i32: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: .cfi_def_cfa_offset 16 +; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdi +; X64-NEXT: callq frexp@PLT +; X64-NEXT: movl {{[0-9]+}}(%rsp), %eax +; X64-NEXT: popq %rcx +; X64-NEXT: .cfi_def_cfa_offset 8 +; X64-NEXT: retq +; +; WIN32-LABEL: test_frexp_f64_i32: +; WIN32: # %bb.0: +; WIN32-NEXT: subl $16, %esp +; WIN32-NEXT: fldl {{[0-9]+}}(%esp) +; WIN32-NEXT: leal {{[0-9]+}}(%esp), %eax +; WIN32-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN32-NEXT: fstpl (%esp) +; WIN32-NEXT: calll _frexp +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax +; WIN32-NEXT: addl $16, %esp +; WIN32-NEXT: retl + %result = call { double, i32 } @llvm.frexp.f64.i32(double %a) + ret { double, i32 } %result +} + +define double @test_frexp_f64_i32_only_use_fract(double %a) { +; X64-LABEL: test_frexp_f64_i32_only_use_fract: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: .cfi_def_cfa_offset 16 +; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdi +; X64-NEXT: callq frexp@PLT +; X64-NEXT: popq %rax +; X64-NEXT: .cfi_def_cfa_offset 8 +; X64-NEXT: retq +; +; WIN32-LABEL: test_frexp_f64_i32_only_use_fract: +; WIN32: # %bb.0: +; WIN32-NEXT: subl $16, %esp +; WIN32-NEXT: fldl {{[0-9]+}}(%esp) +; WIN32-NEXT: leal {{[0-9]+}}(%esp), %eax +; WIN32-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN32-NEXT: fstpl (%esp) +; WIN32-NEXT: calll _frexp +; WIN32-NEXT: addl $16, %esp +; WIN32-NEXT: retl + %result = call { double, i32 } @llvm.frexp.f64.i32(double %a) + %result.0 = extractvalue { double, i32 } %result, 0 + ret double %result.0 +} + +define i32 @test_frexp_f64_i32_only_use_exp(double %a) { +; X64-LABEL: test_frexp_f64_i32_only_use_exp: +; X64: # %bb.0: +; X64-NEXT: pushq %rax +; X64-NEXT: .cfi_def_cfa_offset 16 +; X64-NEXT: leaq {{[0-9]+}}(%rsp), %rdi +; X64-NEXT: callq frexp@PLT +; X64-NEXT: movl {{[0-9]+}}(%rsp), %eax +; X64-NEXT: popq %rcx +; X64-NEXT: .cfi_def_cfa_offset 8 +; X64-NEXT: retq +; +; WIN32-LABEL: test_frexp_f64_i32_only_use_exp: +; WIN32: # %bb.0: +; WIN32-NEXT: subl $16, %esp +; WIN32-NEXT: fldl {{[0-9]+}}(%esp) +; WIN32-NEXT: leal {{[0-9]+}}(%esp), %eax +; WIN32-NEXT: movl %eax, {{[0-9]+}}(%esp) +; WIN32-NEXT: fstpl (%esp) +; WIN32-NEXT: calll _frexp +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax +; WIN32-NEXT: addl $16, %esp +; WIN32-NEXT: retl + %result = call { double, i32 } @llvm.frexp.f64.i32(double %a) + %result.0 = extractvalue { double, i32 } %result, 1 + ret i32 %result.0 +} + +; FIXME: Widen vector result +; define { <2 x double>, <2 x i32> } @test_frexp_v2f64_v2i32(<2 x double> %a) { +; %result = call { <2 x double>, <2 x i32> } @llvm.frexp.v2f64.v2i32(<2 x double> %a) +; ret { <2 x double>, <2 x i32> } %result +; } + +; define <2 x double> @test_frexp_v2f64_v2i32_only_use_fract(<2 x double> %a) { +; %result = call { <2 x double>, <2 x i32> } @llvm.frexp.v2f64.v2i32(<2 x double> %a) +; %result.0 = extractvalue { <2 x double>, <2 x i32> } %result, 0 +; ret <2 x double> %result.0 +; } + +; define <2 x i32> @test_frexp_v2f64_v2i32_only_use_exp(<2 x double> %a) { +; %result = call { <2 x double>, <2 x i32> } @llvm.frexp.v2f64.v2i32(<2 x double> %a) +; %result.1 = extractvalue { <2 x double>, <2 x i32> } %result, 1 +; ret <2 x i32> %result.1 +; } + +declare { float, i32 } @llvm.frexp.f32.i32(float) #0 +declare { <2 x float>, <2 x i32> } @llvm.frexp.v2f32.v2i32(<2 x float>) #0 +declare { <4 x float>, <4 x i32> } @llvm.frexp.v4f32.v4i32(<4 x float>) #0 + +declare { half, i32 } @llvm.frexp.f16.i32(half) #0 +declare { <2 x half>, <2 x i32> } @llvm.frexp.v2f16.v2i32(<2 x half>) #0 + +declare { double, i32 } @llvm.frexp.f64.i32(double) #0 +declare { <2 x double>, <2 x i32> } @llvm.frexp.v2f64.v2i32(<2 x double>) #0 + +declare { half, i16 } @llvm.frexp.f16.i16(half) #0 +declare { <2 x half>, <2 x i16> } @llvm.frexp.v2f16.v2i16(<2 x half>) #0 + +attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }