diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h --- a/llvm/include/llvm/CodeGen/ISDOpcodes.h +++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -310,6 +310,13 @@ STRICT_FP_TO_SINT, STRICT_FP_TO_UINT, + /// STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to + /// a floating point value. These have the same semantics as sitofp and + /// uitofp in IR. + /// They are used to limit optimizations while the DAG is being optimized. + STRICT_SINT_TO_FP, + STRICT_UINT_TO_FP, + /// X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating /// point type down to the precision of the destination VT. TRUNC is a /// flag, which is always an integer that is zero or one. If TRUNC is 0, diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -789,6 +789,11 @@ /// float type VT, by either extending or rounding (by truncation). SDValue getFPExtendOrRound(SDValue Op, const SDLoc &DL, EVT VT); + /// Convert Op, which must be a STRICT operation of float type, to the + /// float type VT, by either extending or rounding (by truncation). + SDValue getStrictFPExtendOrRound(SDValue Op, const SDLoc &DL, + EVT VT); + /// Convert Op, which must be of integer type, to the /// integer type VT, by either any-extending or truncating it. SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT); diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h --- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h @@ -715,6 +715,8 @@ case ISD::STRICT_FTRUNC: case ISD::STRICT_FP_TO_SINT: case ISD::STRICT_FP_TO_UINT: + case ISD::STRICT_SINT_TO_FP: + case ISD::STRICT_UINT_TO_FP: case ISD::STRICT_FP_ROUND: case ISD::STRICT_FP_EXTEND: return true; diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -967,6 +967,8 @@ case ISD::STRICT_FTRUNC: EqOpc = ISD::FTRUNC; break; case ISD::STRICT_FP_TO_SINT: EqOpc = ISD::FP_TO_SINT; break; case ISD::STRICT_FP_TO_UINT: EqOpc = ISD::FP_TO_UINT; break; + case ISD::STRICT_SINT_TO_FP: EqOpc = ISD::SINT_TO_FP; break; + case ISD::STRICT_UINT_TO_FP: EqOpc = ISD::UINT_TO_FP; break; case ISD::STRICT_FP_ROUND: EqOpc = ISD::FP_ROUND; break; case ISD::STRICT_FP_EXTEND: EqOpc = ISD::FP_EXTEND; break; } @@ -4077,13 +4079,15 @@ /// \param N Node to expand /// \param Result output after conversion /// \returns True, if the expansion was successful, false otherwise - bool expandFP_TO_UINT(SDNode *N, SDValue &Result, SDValue &Chain, SelectionDAG &DAG) const; + bool expandFP_TO_UINT(SDNode *N, SDValue &Result, SDValue &Chain, + SelectionDAG &DAG) const; /// Expand UINT(i64) to double(f64) conversion /// \param N Node to expand /// \param Result output after conversion /// \returns True, if the expansion was successful, false otherwise - bool expandUINT_TO_FP(SDNode *N, SDValue &Result, SelectionDAG &DAG) const; + bool expandUINT_TO_FP(SDNode *N, SDValue &Result, SDValue &Chain, + SelectionDAG &DAG) const; /// Expand fminnum/fmaxnum into fminnum_ieee/fmaxnum_ieee with quieted inputs. SDValue expandFMINNUM_FMAXNUM(SDNode *N, SelectionDAG &DAG) const; diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h --- a/llvm/include/llvm/IR/IntrinsicInst.h +++ b/llvm/include/llvm/IR/IntrinsicInst.h @@ -261,6 +261,8 @@ case Intrinsic::experimental_constrained_fma: case Intrinsic::experimental_constrained_fptosi: case Intrinsic::experimental_constrained_fptoui: + case Intrinsic::experimental_constrained_sitofp: + case Intrinsic::experimental_constrained_uitofp: case Intrinsic::experimental_constrained_fptrunc: case Intrinsic::experimental_constrained_fpext: case Intrinsic::experimental_constrained_sqrt: diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -640,6 +640,16 @@ [ llvm_anyfloat_ty, llvm_metadata_ty ]>; + def int_experimental_constrained_sitofp : Intrinsic<[ llvm_anyfloat_ty ], + [ llvm_anyint_ty, + llvm_metadata_ty, + llvm_metadata_ty ]>; + + def int_experimental_constrained_uitofp : Intrinsic<[ llvm_anyfloat_ty ], + [ llvm_anyint_ty, + llvm_metadata_ty, + llvm_metadata_ty ]>; + def int_experimental_constrained_fptrunc : Intrinsic<[ llvm_anyfloat_ty ], [ llvm_anyfloat_ty, llvm_metadata_ty, diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -170,7 +170,7 @@ SDValue NewIntValue) const; SDValue ExpandFCOPYSIGN(SDNode *Node) const; SDValue ExpandFABS(SDNode *Node) const; - SDValue ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0, EVT DestVT, + SDValue ExpandLegalINT_TO_FP(bool isSigned, SDNode *Node, EVT DestVT, const SDLoc &dl); SDValue PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT, bool isSigned, const SDLoc &dl); @@ -1011,6 +1011,11 @@ Action = TLI.getOperationAction(Node->getOpcode(), Node->getOperand(0).getValueType()); break; + case ISD::STRICT_SINT_TO_FP: + case ISD::STRICT_UINT_TO_FP: + Action = TLI.getOperationAction(Node->getOpcode(), + Node->getOperand(1).getValueType()); + break; case ISD::SIGN_EXTEND_INREG: { EVT InnerType = cast(Node->getOperand(1))->getVT(); Action = TLI.getOperationAction(Node->getOpcode(), InnerType); @@ -2334,9 +2339,14 @@ /// INT_TO_FP operation of the specified operand when the target requests that /// we expand it. At this point, we know that the result and operand types are /// legal for the target. -SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0, +SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDNode *Node, EVT DestVT, const SDLoc &dl) { + SDValue Op0; + if (Node->isStrictFPOpcode()) + Op0 = Node->getOperand(1); + else + Op0 = Node->getOperand(0); EVT SrcVT = Op0.getValueType(); // TODO: Should any fast-math-flags be set for the created nodes? @@ -2384,15 +2394,35 @@ BitsToDouble(0x4330000000000000ULL), dl, MVT::f64); // subtract the bias - SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Load, Bias); + SDValue Sub; + if (Node->isStrictFPOpcode()) { + Sub = DAG.getNode(ISD::STRICT_FSUB, dl, { MVT::f64, MVT::Other }, + { Node->getOperand(0), Load, Bias }); + } else + Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Load, Bias); // final result - SDValue Result = DAG.getFPExtendOrRound(Sub, dl, DestVT); + SDValue Result; + if (Node->isStrictFPOpcode()) { + if (!DestVT.bitsEq(Sub.getValueType())) { + Result = DAG.getStrictFPExtendOrRound(Sub, dl, DestVT); + } + else + Result = Sub; + // Finally relink the chain + DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Result.getValue(1)); + } else + Result = DAG.getFPExtendOrRound(Sub, dl, DestVT); return Result; } assert(!isSigned && "Legalize cannot Expand SINT_TO_FP for i64 yet"); // Code below here assumes !isSigned without checking again. - SDValue Tmp1 = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0); + SDValue Tmp1; + if (Node->isStrictFPOpcode()) { + Tmp1 = DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, { DestVT, MVT::Other }, + { Node->getOperand(0), Op0 }); + } else + Tmp1 = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0); SDValue SignSet = DAG.getSetCC(dl, getSetCCResultType(SrcVT), Op0, DAG.getConstant(0, dl, SrcVT), ISD::SETLT); @@ -2438,7 +2468,15 @@ FudgeInReg = Handle.getValue(); } - return DAG.getNode(ISD::FADD, dl, DestVT, Tmp1, FudgeInReg); + if (Node->isStrictFPOpcode()) { + SDValue Result = DAG.getNode(ISD::STRICT_FADD, dl, { DestVT, MVT::Other }, + { Tmp1.getValue(1), Tmp1, FudgeInReg }); + // Relink the chain + DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Result.getValue(1)); + return Result; + } + else + return DAG.getNode(ISD::FADD, dl, DestVT, Tmp1, FudgeInReg); } /// This function is responsible for legalizing a @@ -2873,14 +2911,30 @@ break; } case ISD::UINT_TO_FP: - if (TLI.expandUINT_TO_FP(Node, Tmp1, DAG)) { - Results.push_back(Tmp1); + case ISD::STRICT_UINT_TO_FP: + if (TLI.expandUINT_TO_FP(Node, Tmp1, Tmp2, DAG)) { + if (Node->isStrictFPOpcode()) { + // Relink the chain. + DAG.ReplaceAllUsesOfValueWith(SDValue(Node,1), Tmp2); + // Replace the new UINT result. + ReplaceNodeWithValue(SDValue(Node, 0), Tmp1); + LLVM_DEBUG(dbgs() << "Successfully expanded STRICT_UINT_TO_FP node\n"); + } else + Results.push_back(Tmp1); break; } LLVM_FALLTHROUGH; case ISD::SINT_TO_FP: - Tmp1 = ExpandLegalINT_TO_FP(Node->getOpcode() == ISD::SINT_TO_FP, - Node->getOperand(0), Node->getValueType(0), dl); + case ISD::STRICT_SINT_TO_FP: + if (Node->isStrictFPOpcode()) { + Tmp1 = ExpandLegalINT_TO_FP(Node->getOpcode() == ISD::STRICT_SINT_TO_FP, + Node, Node->getValueType(0), dl); + ReplaceNode(Node, Tmp1.getNode()); + LLVM_DEBUG(dbgs() << "Successfully expanded STRICT_xINT_TO_FP node\n"); + return true; + } else + Tmp1 = ExpandLegalINT_TO_FP(Node->getOpcode() == ISD::SINT_TO_FP, + Node, Node->getValueType(0), dl); Results.push_back(Tmp1); break; case ISD::FP_TO_SINT: diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -1205,7 +1205,8 @@ // Attempt to expand using TargetLowering. SDValue Result; - if (TLI.expandUINT_TO_FP(Op.getNode(), Result, DAG)) + SDValue Chain; + if (TLI.expandUINT_TO_FP(Op.getNode(), Result, Chain, DAG)) // XXX kpnfixme return Result; // Make sure that the SINT_TO_FP and SRL instructions are available. diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -172,6 +172,8 @@ case ISD::STRICT_FTRUNC: case ISD::STRICT_FP_TO_SINT: case ISD::STRICT_FP_TO_UINT: + case ISD::STRICT_UINT_TO_FP: + case ISD::STRICT_SINT_TO_FP: case ISD::STRICT_FP_EXTEND: R = ScalarizeVecRes_StrictFPOp(N); break; @@ -914,8 +916,10 @@ case ISD::FSQRT: case ISD::FTRUNC: case ISD::SINT_TO_FP: + case ISD::STRICT_SINT_TO_FP: case ISD::TRUNCATE: case ISD::UINT_TO_FP: + case ISD::STRICT_UINT_TO_FP: case ISD::FCANONICALIZE: SplitVecRes_UnaryOp(N, Lo, Hi); break; @@ -2847,6 +2851,8 @@ case ISD::STRICT_FP_ROUND: case ISD::STRICT_FP_TO_SINT: case ISD::STRICT_FP_TO_UINT: + case ISD::STRICT_UINT_TO_FP: + case ISD::STRICT_SINT_TO_FP: Res = WidenVecRes_Convert_StrictFP(N); break; @@ -4166,7 +4172,9 @@ case ISD::FP_TO_UINT: case ISD::STRICT_FP_TO_UINT: case ISD::SINT_TO_FP: + case ISD::STRICT_SINT_TO_FP: case ISD::UINT_TO_FP: + case ISD::STRICT_UINT_TO_FP: case ISD::TRUNCATE: Res = WidenVecOp_Convert(N); break; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -1101,6 +1101,15 @@ : getNode(ISD::FP_ROUND, DL, VT, Op, getIntPtrConstant(0, DL)); } +SDValue SelectionDAG::getStrictFPExtendOrRound(SDValue Op, const SDLoc &DL, + EVT VT) { + SDValue Chain = SDValue(Op.getNode(),1); + + return VT.bitsGT(Op.getValueType()) + ? getNode(ISD::STRICT_FP_EXTEND, DL, { VT, MVT::Other }, { Chain, Op }) + : getNode(ISD::STRICT_FP_ROUND, DL, { VT, MVT::Other }, { Chain, Op, getIntPtrConstant(0, DL) }); +} + SDValue SelectionDAG::getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) { return VT.bitsGT(Op.getValueType()) ? getNode(ISD::ANY_EXTEND, DL, VT, Op) : @@ -7772,6 +7781,8 @@ case ISD::STRICT_FP_EXTEND: NewOpc = ISD::FP_EXTEND; break; case ISD::STRICT_FP_TO_SINT: NewOpc = ISD::FP_TO_SINT; break; case ISD::STRICT_FP_TO_UINT: NewOpc = ISD::FP_TO_UINT; break; + case ISD::STRICT_SINT_TO_FP: NewOpc = ISD::SINT_TO_FP; break; + case ISD::STRICT_UINT_TO_FP: NewOpc = ISD::UINT_TO_FP; break; } assert(Node->getNumValues() == 2 && "Unexpected number of results!"); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -6095,6 +6095,8 @@ case Intrinsic::experimental_constrained_fma: case Intrinsic::experimental_constrained_fptosi: case Intrinsic::experimental_constrained_fptoui: + case Intrinsic::experimental_constrained_sitofp: + case Intrinsic::experimental_constrained_uitofp: case Intrinsic::experimental_constrained_fptrunc: case Intrinsic::experimental_constrained_fpext: case Intrinsic::experimental_constrained_sqrt: @@ -6888,6 +6890,12 @@ case Intrinsic::experimental_constrained_fptoui: Opcode = ISD::STRICT_FP_TO_UINT; break; + case Intrinsic::experimental_constrained_sitofp: + Opcode = ISD::STRICT_SINT_TO_FP; + break; + case Intrinsic::experimental_constrained_uitofp: + Opcode = ISD::STRICT_UINT_TO_FP; + break; case Intrinsic::experimental_constrained_fptrunc: Opcode = ISD::STRICT_FP_ROUND; break; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -323,7 +323,9 @@ case ISD::STRICT_FP_EXTEND: return "strict_fp_extend"; case ISD::SINT_TO_FP: return "sint_to_fp"; + case ISD::STRICT_SINT_TO_FP: return "strict_sint_to_fp"; case ISD::UINT_TO_FP: return "uint_to_fp"; + case ISD::STRICT_UINT_TO_FP: return "strict_uint_to_fp"; case ISD::FP_TO_SINT: return "fp_to_sint"; case ISD::STRICT_FP_TO_SINT: return "strict_fp_to_sint"; case ISD::FP_TO_UINT: return "fp_to_uint"; diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -6027,8 +6027,13 @@ } bool TargetLowering::expandUINT_TO_FP(SDNode *Node, SDValue &Result, + SDValue &Chain, SelectionDAG &DAG) const { - SDValue Src = Node->getOperand(0); + SDValue Src; + if (Node->isStrictFPOpcode()) + Src = Node->getOperand(1); + else + Src = Node->getOperand(0); EVT SrcVT = Src.getValueType(); EVT DstVT = Node->getValueType(0); @@ -6059,8 +6064,18 @@ SDValue And = DAG.getNode(ISD::AND, dl, SrcVT, Src, AndConst); SDValue Or = DAG.getNode(ISD::OR, dl, SrcVT, And, Shr); - SDValue SignCvt = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Or); - SDValue Slow = DAG.getNode(ISD::FADD, dl, DstVT, SignCvt, SignCvt); + SDValue Slow; + if (Node->isStrictFPOpcode()) { + SDValue SignCvt = DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, + { DstVT, MVT::Other }, + { Node->getOperand(0), Or }); + Slow = DAG.getNode(ISD::STRICT_FADD, dl, { DstVT, MVT::Other }, + { SignCvt.getValue(1), SignCvt, SignCvt }); + Chain = Slow.getValue(1); + } else { + SDValue SignCvt = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Or); + Slow = DAG.getNode(ISD::FADD, dl, DstVT, SignCvt, SignCvt); + } // TODO: This really should be implemented using a branch rather than a // select. We happen to get lucky and machinesink does the right @@ -6103,8 +6118,18 @@ SDValue HiOr = DAG.getNode(ISD::OR, dl, SrcVT, Hi, TwoP84); SDValue LoFlt = DAG.getBitcast(DstVT, LoOr); SDValue HiFlt = DAG.getBitcast(DstVT, HiOr); - SDValue HiSub = DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, TwoP84PlusTwoP52); - Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub); + if (Node->isStrictFPOpcode()) { + SDValue HiSub = DAG.getNode(ISD::STRICT_FSUB, dl, { DstVT, MVT::Other}, + { Node->getOperand(0), HiFlt, + TwoP84PlusTwoP52 }); + Result = DAG.getNode(ISD::STRICT_FADD, dl, { DstVT, MVT::Other }, + { HiSub.getValue(1), LoFlt, HiSub }); + Chain = Result.getValue(1); + } else { + SDValue HiSub = DAG.getNode(ISD::FSUB, dl, DstVT, HiFlt, + TwoP84PlusTwoP52); + Result = DAG.getNode(ISD::FADD, dl, DstVT, LoFlt, HiSub); + } return true; } diff --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp --- a/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -725,6 +725,8 @@ setOperationAction(ISD::STRICT_FP_EXTEND, VT, Expand); setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Expand); setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Expand); + setOperationAction(ISD::STRICT_SINT_TO_FP, VT, Expand); + setOperationAction(ISD::STRICT_UINT_TO_FP, VT, Expand); // For most targets @llvm.get.dynamic.area.offset just returns 0. setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, VT, Expand); diff --git a/llvm/lib/IR/IntrinsicInst.cpp b/llvm/lib/IR/IntrinsicInst.cpp --- a/llvm/lib/IR/IntrinsicInst.cpp +++ b/llvm/lib/IR/IntrinsicInst.cpp @@ -190,6 +190,8 @@ return false; case Intrinsic::experimental_constrained_fptosi: case Intrinsic::experimental_constrained_fptoui: + case Intrinsic::experimental_constrained_sitofp: + case Intrinsic::experimental_constrained_uitofp: case Intrinsic::experimental_constrained_fptrunc: case Intrinsic::experimental_constrained_fpext: case Intrinsic::experimental_constrained_sqrt: diff --git a/llvm/test/CodeGen/X86/fp-intrinsics.ll b/llvm/test/CodeGen/X86/fp-intrinsics.ll --- a/llvm/test/CodeGen/X86/fp-intrinsics.ll +++ b/llvm/test/CodeGen/X86/fp-intrinsics.ll @@ -374,7 +374,7 @@ ; CHECK-LABEL: f26 ; COMMON: jmp llrintf -define i64 @f26(float %x) { +define i64 @f26(float %x) #0 { entry: %result = call i64 @llvm.experimental.constrained.llrint.i64.f32(float %x, metadata !"round.dynamic", @@ -418,6 +418,28 @@ ret i64 %result } +; Verify that sitofp(42) isn't simplified when the rounding mode is unknown. +; CHECK-LABEL: @f31 +; CHECK: call double @llvm.experimental.constrained.sitofp +define double @f31() #0 { +entry: + %result = call double @llvm.experimental.constrained.sitofp.f64.i32(i32 42, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret double %result +} + +; Verify that uitofp(42) isn't simplified when the rounding mode is unknown. +; CHECK-LABEL: @f32 +; CHECK: call double @llvm.experimental.constrained.uitofp +define double @f32() #0 { +entry: + %result = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 42, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret double %result +} + attributes #0 = { strictfp } @llvm.fp.env = thread_local global i8 zeroinitializer, section "llvm.metadata" @@ -452,3 +474,5 @@ declare i32 @llvm.experimental.constrained.lround.i32.f32(float, metadata) declare i64 @llvm.experimental.constrained.llround.i64.f64(double, metadata) declare i64 @llvm.experimental.constrained.llround.i64.f32(float, metadata) +declare double @llvm.experimental.constrained.sitofp.f64.i32(i32, metadata, metadata) +declare double @llvm.experimental.constrained.uitofp.f64.i32(i32, metadata, metadata) diff --git a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll --- a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll @@ -5490,6 +5490,514 @@ ret <3 x double> %trunc } +define <1 x double> @constrained_vector_sitofp_v1f64_v1i32(<1 x i32> %x) #0 { +; CHECK-LABEL: constrained_vector_sitofp_v1f64_v1i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xorl $-2147483648, %edi # imm = 0x80000000 +; CHECK-NEXT: movl %edi, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movl $1127219200, -{{[0-9]+}}(%rsp) # imm = 0x43300000 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: subsd {{.*}}(%rip), %xmm0 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_sitofp_v1f64_v1i32: +; AVX: # %bb.0: # %entry +; AVX-NEXT: xorl $-2147483648, %edi # imm = 0x80000000 +; AVX-NEXT: movl %edi, -{{[0-9]+}}(%rsp) +; AVX-NEXT: movl $1127219200, -{{[0-9]+}}(%rsp) # imm = 0x43300000 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX-NEXT: vsubsd {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: retq +entry: + %result = call <1 x double> + @llvm.experimental.constrained.sitofp.v1f64.v1i32(<1 x i32> %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <1 x double> %result +} + +define <1 x float> @constrained_vector_sitofp_v1f32_v1i32(<1 x i32> %x) #0 { +; CHECK-LABEL: constrained_vector_sitofp_v1f32_v1i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xorl $-2147483648, %edi # imm = 0x80000000 +; CHECK-NEXT: movl %edi, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movl $1127219200, -{{[0-9]+}}(%rsp) # imm = 0x43300000 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: subsd {{.*}}(%rip), %xmm0 +; CHECK-NEXT: cvtsd2ss %xmm0, %xmm0 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_sitofp_v1f32_v1i32: +; AVX: # %bb.0: # %entry +; AVX-NEXT: xorl $-2147483648, %edi # imm = 0x80000000 +; AVX-NEXT: movl %edi, -{{[0-9]+}}(%rsp) +; AVX-NEXT: movl $1127219200, -{{[0-9]+}}(%rsp) # imm = 0x43300000 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX-NEXT: vsubsd {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 +; AVX-NEXT: retq +entry: + %result = call <1 x float> + @llvm.experimental.constrained.sitofp.v1f32.v1i32(<1 x i32> %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <1 x float> %result +} + +define <2 x double> @constrained_vector_sitofp_v2f64_v2i32(<2 x i32> %x) #0 { +; CHECK-LABEL: constrained_vector_sitofp_v2f64_v2i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movd %xmm0, %eax +; CHECK-NEXT: xorl $-2147483648, %eax # imm = 0x80000000 +; CHECK-NEXT: movl %eax, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] +; CHECK-NEXT: movd %xmm0, %eax +; CHECK-NEXT: xorl $-2147483648, %eax # imm = 0x80000000 +; CHECK-NEXT: movl %eax, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movl $1127219200, -{{[0-9]+}}(%rsp) # imm = 0x43300000 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; CHECK-NEXT: subsd %xmm1, %xmm0 +; CHECK-NEXT: movl $1127219200, -{{[0-9]+}}(%rsp) # imm = 0x43300000 +; CHECK-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero +; CHECK-NEXT: subsd %xmm1, %xmm2 +; CHECK-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_sitofp_v2f64_v2i32: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vpextrd $1, %xmm0, %eax +; AVX-NEXT: xorl $-2147483648, %eax # imm = 0x80000000 +; AVX-NEXT: movl %eax, -{{[0-9]+}}(%rsp) +; AVX-NEXT: vmovd %xmm0, %eax +; AVX-NEXT: xorl $-2147483648, %eax # imm = 0x80000000 +; AVX-NEXT: movl %eax, -{{[0-9]+}}(%rsp) +; AVX-NEXT: movl $1127219200, -{{[0-9]+}}(%rsp) # imm = 0x43300000 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: movl $1127219200, -{{[0-9]+}}(%rsp) # imm = 0x43300000 +; AVX-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero +; AVX-NEXT: vsubsd %xmm1, %xmm2, %xmm1 +; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX-NEXT: retq +entry: + %result = call <2 x double> + @llvm.experimental.constrained.sitofp.v2f64.v2i32(<2 x i32> %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <2 x double> %result +} + +define <2 x float> @constrained_vector_sitofp_v2f32_v2i32(<2 x i32> %x) #0 { +; CHECK-LABEL: constrained_vector_sitofp_v2f32_v2i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movd %xmm0, %eax +; CHECK-NEXT: xorl $-2147483648, %eax # imm = 0x80000000 +; CHECK-NEXT: movl %eax, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] +; CHECK-NEXT: movd %xmm0, %eax +; CHECK-NEXT: xorl $-2147483648, %eax # imm = 0x80000000 +; CHECK-NEXT: movl %eax, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movl $1127219200, -{{[0-9]+}}(%rsp) # imm = 0x43300000 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; CHECK-NEXT: subsd %xmm1, %xmm0 +; CHECK-NEXT: cvtsd2ss %xmm0, %xmm0 +; CHECK-NEXT: movl $1127219200, -{{[0-9]+}}(%rsp) # imm = 0x43300000 +; CHECK-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero +; CHECK-NEXT: subsd %xmm1, %xmm2 +; CHECK-NEXT: xorps %xmm1, %xmm1 +; CHECK-NEXT: cvtsd2ss %xmm2, %xmm1 +; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_sitofp_v2f32_v2i32: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vpextrd $1, %xmm0, %eax +; AVX-NEXT: xorl $-2147483648, %eax # imm = 0x80000000 +; AVX-NEXT: movl %eax, -{{[0-9]+}}(%rsp) +; AVX-NEXT: vmovd %xmm0, %eax +; AVX-NEXT: xorl $-2147483648, %eax # imm = 0x80000000 +; AVX-NEXT: movl %eax, -{{[0-9]+}}(%rsp) +; AVX-NEXT: movl $1127219200, -{{[0-9]+}}(%rsp) # imm = 0x43300000 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 +; AVX-NEXT: movl $1127219200, -{{[0-9]+}}(%rsp) # imm = 0x43300000 +; AVX-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero +; AVX-NEXT: vsubsd %xmm1, %xmm2, %xmm1 +; AVX-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] +; AVX-NEXT: retq +entry: + %result = call <2 x float> + @llvm.experimental.constrained.sitofp.v2f32.v2i32(<2 x i32> %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <2 x float> %result +} + +define <3 x double> @constrained_vector_sitofp_v3f64_v3i32(<3 x i32> %x) #0 { +; CHECK-LABEL: constrained_vector_sitofp_v3f64_v3i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movd %xmm0, %eax +; CHECK-NEXT: xorl $-2147483648, %eax # imm = 0x80000000 +; CHECK-NEXT: movl %eax, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; CHECK-NEXT: movd %xmm1, %eax +; CHECK-NEXT: xorl $-2147483648, %eax # imm = 0x80000000 +; CHECK-NEXT: movl %eax, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] +; CHECK-NEXT: movd %xmm0, %eax +; CHECK-NEXT: xorl $-2147483648, %eax # imm = 0x80000000 +; CHECK-NEXT: movl %eax, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movl $1127219200, -{{[0-9]+}}(%rsp) # imm = 0x43300000 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero +; CHECK-NEXT: subsd %xmm2, %xmm0 +; CHECK-NEXT: movl $1127219200, -{{[0-9]+}}(%rsp) # imm = 0x43300000 +; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; CHECK-NEXT: subsd %xmm2, %xmm1 +; CHECK-NEXT: movl $1127219200, -{{[0-9]+}}(%rsp) # imm = 0x43300000 +; CHECK-NEXT: movsd {{.*#+}} xmm3 = mem[0],zero +; CHECK-NEXT: subsd %xmm2, %xmm3 +; CHECK-NEXT: movsd %xmm3, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: fldl -{{[0-9]+}}(%rsp) +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_sitofp_v3f64_v3i32: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vpextrd $1, %xmm0, %eax +; AVX-NEXT: xorl $-2147483648, %eax # imm = 0x80000000 +; AVX-NEXT: movl %eax, -{{[0-9]+}}(%rsp) +; AVX-NEXT: vmovd %xmm0, %eax +; AVX-NEXT: xorl $-2147483648, %eax # imm = 0x80000000 +; AVX-NEXT: movl %eax, -{{[0-9]+}}(%rsp) +; AVX-NEXT: vpextrd $2, %xmm0, %eax +; AVX-NEXT: xorl $-2147483648, %eax # imm = 0x80000000 +; AVX-NEXT: movl %eax, -{{[0-9]+}}(%rsp) +; AVX-NEXT: movl $1127219200, -{{[0-9]+}}(%rsp) # imm = 0x43300000 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: movl $1127219200, -{{[0-9]+}}(%rsp) # imm = 0x43300000 +; AVX-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero +; AVX-NEXT: vsubsd %xmm1, %xmm2, %xmm2 +; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm2[0],xmm0[0] +; AVX-NEXT: movl $1127219200, -{{[0-9]+}}(%rsp) # imm = 0x43300000 +; AVX-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero +; AVX-NEXT: vsubsd %xmm1, %xmm2, %xmm1 +; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX-NEXT: retq +entry: + %result = call <3 x double> + @llvm.experimental.constrained.sitofp.v3f64.v3i32(<3 x i32> %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <3 x double> %result +} + +define <3 x float> @constrained_vector_sitofp_v3f32_v3i32(<3 x i32> %x) #0 { +; CHECK-LABEL: constrained_vector_sitofp_v3f32_v3i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movd %xmm0, %eax +; CHECK-NEXT: xorl $-2147483648, %eax # imm = 0x80000000 +; CHECK-NEXT: movl %eax, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; CHECK-NEXT: movd %xmm1, %eax +; CHECK-NEXT: xorl $-2147483648, %eax # imm = 0x80000000 +; CHECK-NEXT: movl %eax, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] +; CHECK-NEXT: movd %xmm0, %eax +; CHECK-NEXT: xorl $-2147483648, %eax # imm = 0x80000000 +; CHECK-NEXT: movl %eax, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movl $1127219200, -{{[0-9]+}}(%rsp) # imm = 0x43300000 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; CHECK-NEXT: subsd %xmm1, %xmm0 +; CHECK-NEXT: cvtsd2ss %xmm0, %xmm0 +; CHECK-NEXT: movl $1127219200, -{{[0-9]+}}(%rsp) # imm = 0x43300000 +; CHECK-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero +; CHECK-NEXT: subsd %xmm1, %xmm2 +; CHECK-NEXT: cvtsd2ss %xmm2, %xmm2 +; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; CHECK-NEXT: movl $1127219200, -{{[0-9]+}}(%rsp) # imm = 0x43300000 +; CHECK-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero +; CHECK-NEXT: subsd %xmm1, %xmm2 +; CHECK-NEXT: xorps %xmm1, %xmm1 +; CHECK-NEXT: cvtsd2ss %xmm2, %xmm1 +; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_sitofp_v3f32_v3i32: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vpextrd $1, %xmm0, %eax +; AVX-NEXT: xorl $-2147483648, %eax # imm = 0x80000000 +; AVX-NEXT: movl %eax, -{{[0-9]+}}(%rsp) +; AVX-NEXT: vmovd %xmm0, %eax +; AVX-NEXT: xorl $-2147483648, %eax # imm = 0x80000000 +; AVX-NEXT: movl %eax, -{{[0-9]+}}(%rsp) +; AVX-NEXT: vpextrd $2, %xmm0, %eax +; AVX-NEXT: xorl $-2147483648, %eax # imm = 0x80000000 +; AVX-NEXT: movl %eax, -{{[0-9]+}}(%rsp) +; AVX-NEXT: movl $1127219200, -{{[0-9]+}}(%rsp) # imm = 0x43300000 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 +; AVX-NEXT: movl $1127219200, -{{[0-9]+}}(%rsp) # imm = 0x43300000 +; AVX-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero +; AVX-NEXT: vsubsd %xmm1, %xmm2, %xmm2 +; AVX-NEXT: vcvtsd2ss %xmm2, %xmm2, %xmm2 +; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0],xmm0[0],xmm2[2,3] +; AVX-NEXT: movl $1127219200, -{{[0-9]+}}(%rsp) # imm = 0x43300000 +; AVX-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero +; AVX-NEXT: vsubsd %xmm1, %xmm2, %xmm1 +; AVX-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] +; AVX-NEXT: retq +entry: + %result = call <3 x float> + @llvm.experimental.constrained.sitofp.v3f32.v3i32(<3 x i32> %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <3 x float> %result +} + +define <1 x double> @constrained_vector_uitofp_v1f64_v1i32(<1 x i32> %x) #0 { +; CHECK-LABEL: constrained_vector_uitofp_v1f64_v1i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movl %edi, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movl $1127219200, -{{[0-9]+}}(%rsp) # imm = 0x43300000 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: subsd {{.*}}(%rip), %xmm0 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_uitofp_v1f64_v1i32: +; AVX: # %bb.0: # %entry +; AVX-NEXT: movl %edi, -{{[0-9]+}}(%rsp) +; AVX-NEXT: movl $1127219200, -{{[0-9]+}}(%rsp) # imm = 0x43300000 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX-NEXT: vsubsd {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: retq +entry: + %result = call <1 x double> + @llvm.experimental.constrained.uitofp.v1f64.v1i32(<1 x i32> %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <1 x double> %result +} + +define <1 x float> @constrained_vector_uitofp_v1f32_v1i32(<1 x i32> %x) #0 { +; CHECK-LABEL: constrained_vector_uitofp_v1f32_v1i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movl %edi, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movl $1127219200, -{{[0-9]+}}(%rsp) # imm = 0x43300000 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: subsd {{.*}}(%rip), %xmm0 +; CHECK-NEXT: cvtsd2ss %xmm0, %xmm0 +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_uitofp_v1f32_v1i32: +; AVX: # %bb.0: # %entry +; AVX-NEXT: movl %edi, -{{[0-9]+}}(%rsp) +; AVX-NEXT: movl $1127219200, -{{[0-9]+}}(%rsp) # imm = 0x43300000 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX-NEXT: vsubsd {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 +; AVX-NEXT: retq +entry: + %result = call <1 x float> + @llvm.experimental.constrained.uitofp.v1f32.v1i32(<1 x i32> %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <1 x float> %result +} + +define <2 x double> @constrained_vector_uitofp_v2f64_v2i32(<2 x i32> %x) #0 { +; CHECK-LABEL: constrained_vector_uitofp_v2f64_v2i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movd %xmm0, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] +; CHECK-NEXT: movd %xmm0, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movl $1127219200, -{{[0-9]+}}(%rsp) # imm = 0x43300000 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; CHECK-NEXT: subsd %xmm1, %xmm0 +; CHECK-NEXT: movl $1127219200, -{{[0-9]+}}(%rsp) # imm = 0x43300000 +; CHECK-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero +; CHECK-NEXT: subsd %xmm1, %xmm2 +; CHECK-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm2[0] +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_uitofp_v2f64_v2i32: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vextractps $1, %xmm0, -{{[0-9]+}}(%rsp) +; AVX-NEXT: vmovss %xmm0, -{{[0-9]+}}(%rsp) +; AVX-NEXT: movl $1127219200, -{{[0-9]+}}(%rsp) # imm = 0x43300000 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: movl $1127219200, -{{[0-9]+}}(%rsp) # imm = 0x43300000 +; AVX-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero +; AVX-NEXT: vsubsd %xmm1, %xmm2, %xmm1 +; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] +; AVX-NEXT: retq +entry: + %result = call <2 x double> + @llvm.experimental.constrained.uitofp.v2f64.v2i32(<2 x i32> %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <2 x double> %result +} + +define <2 x float> @constrained_vector_uitofp_v2f32_v2i32(<2 x i32> %x) #0 { +; CHECK-LABEL: constrained_vector_uitofp_v2f32_v2i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movd %xmm0, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] +; CHECK-NEXT: movd %xmm0, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movl $1127219200, -{{[0-9]+}}(%rsp) # imm = 0x43300000 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; CHECK-NEXT: subsd %xmm1, %xmm0 +; CHECK-NEXT: cvtsd2ss %xmm0, %xmm0 +; CHECK-NEXT: movl $1127219200, -{{[0-9]+}}(%rsp) # imm = 0x43300000 +; CHECK-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero +; CHECK-NEXT: subsd %xmm1, %xmm2 +; CHECK-NEXT: xorps %xmm1, %xmm1 +; CHECK-NEXT: cvtsd2ss %xmm2, %xmm1 +; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_uitofp_v2f32_v2i32: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vextractps $1, %xmm0, -{{[0-9]+}}(%rsp) +; AVX-NEXT: vmovss %xmm0, -{{[0-9]+}}(%rsp) +; AVX-NEXT: movl $1127219200, -{{[0-9]+}}(%rsp) # imm = 0x43300000 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 +; AVX-NEXT: movl $1127219200, -{{[0-9]+}}(%rsp) # imm = 0x43300000 +; AVX-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero +; AVX-NEXT: vsubsd %xmm1, %xmm2, %xmm1 +; AVX-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] +; AVX-NEXT: retq +entry: + %result = call <2 x float> + @llvm.experimental.constrained.uitofp.v2f32.v2i32(<2 x i32> %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <2 x float> %result +} + +define <3 x double> @constrained_vector_uitofp_v3f64_v3i32(<3 x i32> %x) #0 { +; CHECK-LABEL: constrained_vector_uitofp_v3f64_v3i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movd %xmm0, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; CHECK-NEXT: movd %xmm1, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] +; CHECK-NEXT: movd %xmm0, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movl $1127219200, -{{[0-9]+}}(%rsp) # imm = 0x43300000 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero +; CHECK-NEXT: subsd %xmm2, %xmm0 +; CHECK-NEXT: movl $1127219200, -{{[0-9]+}}(%rsp) # imm = 0x43300000 +; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; CHECK-NEXT: subsd %xmm2, %xmm1 +; CHECK-NEXT: movl $1127219200, -{{[0-9]+}}(%rsp) # imm = 0x43300000 +; CHECK-NEXT: movsd {{.*#+}} xmm3 = mem[0],zero +; CHECK-NEXT: subsd %xmm2, %xmm3 +; CHECK-NEXT: movsd %xmm3, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: fldl -{{[0-9]+}}(%rsp) +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_uitofp_v3f64_v3i32: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vextractps $1, %xmm0, -{{[0-9]+}}(%rsp) +; AVX-NEXT: vmovss %xmm0, -{{[0-9]+}}(%rsp) +; AVX-NEXT: vextractps $2, %xmm0, -{{[0-9]+}}(%rsp) +; AVX-NEXT: movl $1127219200, -{{[0-9]+}}(%rsp) # imm = 0x43300000 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: movl $1127219200, -{{[0-9]+}}(%rsp) # imm = 0x43300000 +; AVX-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero +; AVX-NEXT: vsubsd %xmm1, %xmm2, %xmm2 +; AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm2[0],xmm0[0] +; AVX-NEXT: movl $1127219200, -{{[0-9]+}}(%rsp) # imm = 0x43300000 +; AVX-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero +; AVX-NEXT: vsubsd %xmm1, %xmm2, %xmm1 +; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX-NEXT: retq +entry: + %result = call <3 x double> + @llvm.experimental.constrained.uitofp.v3f64.v3i32(<3 x i32> %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <3 x double> %result +} + +define <3 x float> @constrained_vector_uitofp_v3f32_v3i32(<3 x i32> %x) #0 { +; CHECK-LABEL: constrained_vector_uitofp_v3f32_v3i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movd %xmm0, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] +; CHECK-NEXT: movd %xmm1, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] +; CHECK-NEXT: movd %xmm0, -{{[0-9]+}}(%rsp) +; CHECK-NEXT: movl $1127219200, -{{[0-9]+}}(%rsp) # imm = 0x43300000 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; CHECK-NEXT: subsd %xmm1, %xmm0 +; CHECK-NEXT: cvtsd2ss %xmm0, %xmm0 +; CHECK-NEXT: movl $1127219200, -{{[0-9]+}}(%rsp) # imm = 0x43300000 +; CHECK-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero +; CHECK-NEXT: subsd %xmm1, %xmm2 +; CHECK-NEXT: cvtsd2ss %xmm2, %xmm2 +; CHECK-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; CHECK-NEXT: movl $1127219200, -{{[0-9]+}}(%rsp) # imm = 0x43300000 +; CHECK-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero +; CHECK-NEXT: subsd %xmm1, %xmm2 +; CHECK-NEXT: xorps %xmm1, %xmm1 +; CHECK-NEXT: cvtsd2ss %xmm2, %xmm1 +; CHECK-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; CHECK-NEXT: retq +; +; AVX-LABEL: constrained_vector_uitofp_v3f32_v3i32: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vextractps $1, %xmm0, -{{[0-9]+}}(%rsp) +; AVX-NEXT: vmovss %xmm0, -{{[0-9]+}}(%rsp) +; AVX-NEXT: vextractps $2, %xmm0, -{{[0-9]+}}(%rsp) +; AVX-NEXT: movl $1127219200, -{{[0-9]+}}(%rsp) # imm = 0x43300000 +; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 +; AVX-NEXT: movl $1127219200, -{{[0-9]+}}(%rsp) # imm = 0x43300000 +; AVX-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero +; AVX-NEXT: vsubsd %xmm1, %xmm2, %xmm2 +; AVX-NEXT: vcvtsd2ss %xmm2, %xmm2, %xmm2 +; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0],xmm0[0],xmm2[2,3] +; AVX-NEXT: movl $1127219200, -{{[0-9]+}}(%rsp) # imm = 0x43300000 +; AVX-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero +; AVX-NEXT: vsubsd %xmm1, %xmm2, %xmm1 +; AVX-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1 +; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0],xmm0[3] +; AVX-NEXT: retq +entry: + %result = call <3 x float> + @llvm.experimental.constrained.uitofp.v3f32.v3i32(<3 x i32> %x, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret <3 x float> %result +} + attributes #0 = { strictfp } ; Single width declarations @@ -5526,6 +6034,10 @@ declare <2 x double> @llvm.experimental.constrained.floor.v2f64(<2 x double>, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.round.v2f64(<2 x double>, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.trunc.v2f64(<2 x double>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.sitofp.v2f64.v2i32(<2 x i32>, metadata, metadata) +declare <2 x float> @llvm.experimental.constrained.sitofp.v2f32.v2i32(<2 x i32>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.uitofp.v2f64.v2i32(<2 x i32>, metadata, metadata) +declare <2 x float> @llvm.experimental.constrained.uitofp.v2f32.v2i32(<2 x i32>, metadata, metadata) ; Scalar width declarations declare <1 x float> @llvm.experimental.constrained.fadd.v1f32(<1 x float>, <1 x float>, metadata, metadata) @@ -5561,6 +6073,10 @@ declare <1 x float> @llvm.experimental.constrained.floor.v1f32(<1 x float>, metadata, metadata) declare <1 x float> @llvm.experimental.constrained.round.v1f32(<1 x float>, metadata, metadata) declare <1 x float> @llvm.experimental.constrained.trunc.v1f32(<1 x float>, metadata, metadata) +declare <1 x double> @llvm.experimental.constrained.sitofp.v1f64.v1i32(<1 x i32>, metadata, metadata) +declare <1 x float> @llvm.experimental.constrained.sitofp.v1f32.v1i32(<1 x i32>, metadata, metadata) +declare <1 x double> @llvm.experimental.constrained.uitofp.v1f64.v1i32(<1 x i32>, metadata, metadata) +declare <1 x float> @llvm.experimental.constrained.uitofp.v1f32.v1i32(<1 x i32>, metadata, metadata) ; Illegal width declarations declare <3 x float> @llvm.experimental.constrained.fadd.v3f32(<3 x float>, <3 x float>, metadata, metadata) @@ -5619,6 +6135,10 @@ declare <3 x double> @llvm.experimental.constrained.round.v3f64(<3 x double>, metadata, metadata) declare <3 x float> @llvm.experimental.constrained.trunc.v3f32(<3 x float>, metadata, metadata) declare <3 x double> @llvm.experimental.constrained.trunc.v3f64(<3 x double>, metadata, metadata) +declare <3 x double> @llvm.experimental.constrained.sitofp.v3f64.v3i32(<3 x i32>, metadata, metadata) +declare <3 x float> @llvm.experimental.constrained.sitofp.v3f32.v3i32(<3 x i32>, metadata, metadata) +declare <3 x double> @llvm.experimental.constrained.uitofp.v3f64.v3i32(<3 x i32>, metadata, metadata) +declare <3 x float> @llvm.experimental.constrained.uitofp.v3f32.v3i32(<3 x i32>, metadata, metadata) ; Double width declarations declare <4 x double> @llvm.experimental.constrained.fadd.v4f64(<4 x double>, <4 x double>, metadata, metadata) diff --git a/llvm/test/Feature/fp-intrinsics.ll b/llvm/test/Feature/fp-intrinsics.ll --- a/llvm/test/Feature/fp-intrinsics.ll +++ b/llvm/test/Feature/fp-intrinsics.ll @@ -373,6 +373,28 @@ ret i64 %result } +; Verify that sitofp(42) isn't simplified when the rounding mode is unknown. +; CHECK-LABEL: @f30 +; CHECK: call double @llvm.experimental.constrained.sitofp +define double @f30() #0 { +entry: + %result = call double @llvm.experimental.constrained.sitofp.f64.i32(i32 42, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret double %result +} + +; Verify that uitofp(42) isn't simplified when the rounding mode is unknown. +; CHECK-LABEL: @f31 +; CHECK: call double @llvm.experimental.constrained.uitofp +define double @f31() #0 { +entry: + %result = call double @llvm.experimental.constrained.uitofp.f64.i32(i32 42, + metadata !"round.dynamic", + metadata !"fpexcept.strict") #0 + ret double %result +} + attributes #0 = { strictfp } @llvm.fp.env = thread_local global i8 zeroinitializer, section "llvm.metadata" @@ -405,3 +427,5 @@ declare i32 @llvm.experimental.constrained.lround.i32.f32(float, metadata) declare i64 @llvm.experimental.constrained.llround.i64.f64(double, metadata) declare i64 @llvm.experimental.constrained.llround.i64.f32(float, metadata) +declare double @llvm.experimental.constrained.sitofp.f64.i32(i32, metadata, metadata) +declare double @llvm.experimental.constrained.uitofp.f64.i32(i32, metadata, metadata)