Index: llvm/trunk/docs/LangRef.rst =================================================================== --- llvm/trunk/docs/LangRef.rst +++ llvm/trunk/docs/LangRef.rst @@ -14580,6 +14580,151 @@ mode argument is only intended as information to the compiler. +'``llvm.experimental.constrained.ceil``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare + @llvm.experimental.constrained.ceil( , + metadata , + metadata ) + +Overview: +""""""""" + +The '``llvm.experimental.constrained.ceil``' intrinsic returns the ceiling of the +first operand. + +Arguments: +"""""""""" + +The first argument and the return value are floating-point numbers of the same +type. + +The second and third arguments specify the rounding mode and exception +behavior as described above. The rounding mode is currently unused for this +intrinsic. + +Semantics: +"""""""""" + +This function returns the same values as the libm ``ceil`` functions +would and handles error conditions in the same way. + + +'``llvm.experimental.constrained.floor``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare + @llvm.experimental.constrained.floor( , + metadata , + metadata ) + +Overview: +""""""""" + +The '``llvm.experimental.constrained.floor``' intrinsic returns the floor of the +first operand. + +Arguments: +"""""""""" + +The first argument and the return value are floating-point numbers of the same +type. + +The second and third arguments specify the rounding mode and exception +behavior as described above. The rounding mode is currently unused for this +intrinsic. + +Semantics: +"""""""""" + +This function returns the same values as the libm ``floor`` functions +would and handles error conditions in the same way. + + +'``llvm.experimental.constrained.round``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare + @llvm.experimental.constrained.round( , + metadata , + metadata ) + +Overview: +""""""""" + +The '``llvm.experimental.constrained.round``' intrinsic returns the first +operand rounded to the nearest integer. + +Arguments: +"""""""""" + +The first argument and the return value are floating-point numbers of the same +type. + +The second and third arguments specify the rounding mode and exception +behavior as described above. The rounding mode is currently unused for this +intrinsic. + +Semantics: +"""""""""" + +This function returns the same values as the libm ``round`` functions +would and handles error conditions in the same way. + + +'``llvm.experimental.constrained.trunc``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare + @llvm.experimental.constrained.trunc( , + metadata , + metadata ) + +Overview: +""""""""" + +The '``llvm.experimental.constrained.trunc``' intrinsic returns the first +operand rounded to the nearest integer not larger in magnitude than the +operand. + +Arguments: +"""""""""" + +The first argument and the return value are floating-point numbers of the same +type. + +The second and third arguments specify the truncing mode and exception +behavior as described above. The truncing mode is currently unused for this +intrinsic. + +Semantics: +"""""""""" + +This function returns the same values as the libm ``trunc`` functions +would and handles error conditions in the same way. + + General Intrinsics ------------------ Index: llvm/trunk/include/llvm/CodeGen/ISDOpcodes.h =================================================================== --- llvm/trunk/include/llvm/CodeGen/ISDOpcodes.h +++ llvm/trunk/include/llvm/CodeGen/ISDOpcodes.h @@ -289,6 +289,7 @@ STRICT_FSQRT, STRICT_FPOW, STRICT_FPOWI, STRICT_FSIN, STRICT_FCOS, STRICT_FEXP, STRICT_FEXP2, STRICT_FLOG, STRICT_FLOG10, STRICT_FLOG2, STRICT_FRINT, STRICT_FNEARBYINT, STRICT_FMAXNUM, STRICT_FMINNUM, + STRICT_FCEIL, STRICT_FFLOOR, STRICT_FROUND, STRICT_FTRUNC, /// FMA - Perform a * b + c with no intermediate rounding step. FMA, Index: llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h =================================================================== --- llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h +++ llvm/trunk/include/llvm/CodeGen/SelectionDAGNodes.h @@ -674,6 +674,10 @@ case ISD::STRICT_FNEARBYINT: case ISD::STRICT_FMAXNUM: case ISD::STRICT_FMINNUM: + case ISD::STRICT_FCEIL: + case ISD::STRICT_FFLOOR: + case ISD::STRICT_FROUND: + case ISD::STRICT_FTRUNC: return true; } } Index: llvm/trunk/include/llvm/CodeGen/TargetLowering.h =================================================================== --- llvm/trunk/include/llvm/CodeGen/TargetLowering.h +++ llvm/trunk/include/llvm/CodeGen/TargetLowering.h @@ -821,6 +821,10 @@ case ISD::STRICT_FNEARBYINT: EqOpc = ISD::FNEARBYINT; break; case ISD::STRICT_FMAXNUM: EqOpc = ISD::FMAXNUM; break; case ISD::STRICT_FMINNUM: EqOpc = ISD::FMINNUM; break; + case ISD::STRICT_FCEIL: EqOpc = ISD::FCEIL; break; + case ISD::STRICT_FFLOOR: EqOpc = ISD::FFLOOR; break; + case ISD::STRICT_FROUND: EqOpc = ISD::FROUND; break; + case ISD::STRICT_FTRUNC: EqOpc = ISD::FTRUNC; break; } auto Action = getOperationAction(EqOpc, VT); Index: llvm/trunk/include/llvm/IR/IntrinsicInst.h =================================================================== --- llvm/trunk/include/llvm/IR/IntrinsicInst.h +++ llvm/trunk/include/llvm/IR/IntrinsicInst.h @@ -253,6 +253,10 @@ case Intrinsic::experimental_constrained_nearbyint: case Intrinsic::experimental_constrained_maxnum: case Intrinsic::experimental_constrained_minnum: + case Intrinsic::experimental_constrained_ceil: + case Intrinsic::experimental_constrained_floor: + case Intrinsic::experimental_constrained_round: + case Intrinsic::experimental_constrained_trunc: return true; default: return false; } Index: llvm/trunk/include/llvm/IR/Intrinsics.td =================================================================== --- llvm/trunk/include/llvm/IR/Intrinsics.td +++ llvm/trunk/include/llvm/IR/Intrinsics.td @@ -576,9 +576,25 @@ LLVMMatchType<0>, llvm_metadata_ty, llvm_metadata_ty ]>; + def int_experimental_constrained_ceil : Intrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0>, + llvm_metadata_ty, + llvm_metadata_ty ]>; + def int_experimental_constrained_floor : Intrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0>, + llvm_metadata_ty, + llvm_metadata_ty ]>; + def int_experimental_constrained_round : Intrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0>, + llvm_metadata_ty, + llvm_metadata_ty ]>; + def int_experimental_constrained_trunc : Intrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0>, + llvm_metadata_ty, + llvm_metadata_ty ]>; } // FIXME: Add intrinsics for fcmp, fptrunc, fpext, fptoui and fptosi. -// FIXME: Add intrinsics for fabs, copysign, floor, ceil, trunc and round? +// FIXME: Add intrinsics for fabs and copysign? //===------------------------- Expect Intrinsics --------------------------===// Index: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1110,6 +1110,10 @@ case ISD::STRICT_FNEARBYINT: case ISD::STRICT_FMAXNUM: case ISD::STRICT_FMINNUM: + case ISD::STRICT_FCEIL: + case ISD::STRICT_FFLOOR: + case ISD::STRICT_FROUND: + case ISD::STRICT_FTRUNC: // These pseudo-ops get legalized as if they were their non-strict // equivalent. For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT // is also legal, but if ISD::FSQRT requires expansion then so does @@ -3940,16 +3944,19 @@ RTLIB::EXP2_PPCF128)); break; case ISD::FTRUNC: + case ISD::STRICT_FTRUNC: Results.push_back(ExpandFPLibCall(Node, RTLIB::TRUNC_F32, RTLIB::TRUNC_F64, RTLIB::TRUNC_F80, RTLIB::TRUNC_F128, RTLIB::TRUNC_PPCF128)); break; case ISD::FFLOOR: + case ISD::STRICT_FFLOOR: Results.push_back(ExpandFPLibCall(Node, RTLIB::FLOOR_F32, RTLIB::FLOOR_F64, RTLIB::FLOOR_F80, RTLIB::FLOOR_F128, RTLIB::FLOOR_PPCF128)); break; case ISD::FCEIL: + case ISD::STRICT_FCEIL: Results.push_back(ExpandFPLibCall(Node, RTLIB::CEIL_F32, RTLIB::CEIL_F64, RTLIB::CEIL_F80, RTLIB::CEIL_F128, RTLIB::CEIL_PPCF128)); @@ -3969,6 +3976,7 @@ RTLIB::NEARBYINT_PPCF128)); break; case ISD::FROUND: + case ISD::STRICT_FROUND: Results.push_back(ExpandFPLibCall(Node, RTLIB::ROUND_F32, RTLIB::ROUND_F64, RTLIB::ROUND_F80, Index: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -308,6 +308,10 @@ case ISD::STRICT_FNEARBYINT: case ISD::STRICT_FMAXNUM: case ISD::STRICT_FMINNUM: + case ISD::STRICT_FCEIL: + case ISD::STRICT_FFLOOR: + case ISD::STRICT_FROUND: + case ISD::STRICT_FTRUNC: // These pseudo-ops get legalized as if they were their non-strict // equivalent. For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT // is also legal, but if ISD::FSQRT requires expansion then so does @@ -758,6 +762,10 @@ case ISD::STRICT_FNEARBYINT: case ISD::STRICT_FMAXNUM: case ISD::STRICT_FMINNUM: + case ISD::STRICT_FCEIL: + case ISD::STRICT_FFLOOR: + case ISD::STRICT_FROUND: + case ISD::STRICT_FTRUNC: return ExpandStrictFPOp(Op); default: return DAG.UnrollVectorOp(Op.getNode()); Index: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -166,6 +166,10 @@ case ISD::STRICT_FNEARBYINT: case ISD::STRICT_FMAXNUM: case ISD::STRICT_FMINNUM: + case ISD::STRICT_FCEIL: + case ISD::STRICT_FFLOOR: + case ISD::STRICT_FROUND: + case ISD::STRICT_FTRUNC: R = ScalarizeVecRes_StrictFPOp(N); break; } @@ -838,6 +842,10 @@ case ISD::STRICT_FNEARBYINT: case ISD::STRICT_FMAXNUM: case ISD::STRICT_FMINNUM: + case ISD::STRICT_FCEIL: + case ISD::STRICT_FFLOOR: + case ISD::STRICT_FROUND: + case ISD::STRICT_FTRUNC: SplitVecRes_StrictFPOp(N, Lo, Hi); break; } @@ -2406,6 +2414,10 @@ case ISD::STRICT_FNEARBYINT: case ISD::STRICT_FMAXNUM: case ISD::STRICT_FMINNUM: + case ISD::STRICT_FCEIL: + case ISD::STRICT_FFLOOR: + case ISD::STRICT_FROUND: + case ISD::STRICT_FTRUNC: Res = WidenVecRes_StrictFP(N); break; Index: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -7384,6 +7384,10 @@ break; case ISD::STRICT_FMAXNUM: NewOpc = ISD::FMAXNUM; break; case ISD::STRICT_FMINNUM: NewOpc = ISD::FMINNUM; break; + case ISD::STRICT_FCEIL: NewOpc = ISD::FCEIL; IsUnary = true; break; + case ISD::STRICT_FFLOOR: NewOpc = ISD::FFLOOR; IsUnary = true; break; + case ISD::STRICT_FROUND: NewOpc = ISD::FROUND; IsUnary = true; break; + case ISD::STRICT_FTRUNC: NewOpc = ISD::FTRUNC; IsUnary = true; break; } // We're taking this node out of the chain, so we need to re-link things. Index: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -5633,6 +5633,10 @@ case Intrinsic::experimental_constrained_nearbyint: case Intrinsic::experimental_constrained_maxnum: case Intrinsic::experimental_constrained_minnum: + case Intrinsic::experimental_constrained_ceil: + case Intrinsic::experimental_constrained_floor: + case Intrinsic::experimental_constrained_round: + case Intrinsic::experimental_constrained_trunc: visitConstrainedFPIntrinsic(cast(I)); return nullptr; case Intrinsic::fmuladd: { @@ -6386,6 +6390,18 @@ case Intrinsic::experimental_constrained_minnum: Opcode = ISD::STRICT_FMINNUM; break; + case Intrinsic::experimental_constrained_ceil: + Opcode = ISD::STRICT_FCEIL; + break; + case Intrinsic::experimental_constrained_floor: + Opcode = ISD::STRICT_FFLOOR; + break; + case Intrinsic::experimental_constrained_round: + Opcode = ISD::STRICT_FROUND; + break; + case Intrinsic::experimental_constrained_trunc: + Opcode = ISD::STRICT_FTRUNC; + break; } const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue Chain = getRoot(); Index: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -193,13 +193,17 @@ case ISD::STRICT_FCOS: return "strict_fcos"; case ISD::FSINCOS: return "fsincos"; case ISD::FTRUNC: return "ftrunc"; + case ISD::STRICT_FTRUNC: return "strict_ftrunc"; case ISD::FFLOOR: return "ffloor"; + case ISD::STRICT_FFLOOR: return "strict_ffloor"; case ISD::FCEIL: return "fceil"; + case ISD::STRICT_FCEIL: return "strict_fceil"; case ISD::FRINT: return "frint"; case ISD::STRICT_FRINT: return "strict_frint"; case ISD::FNEARBYINT: return "fnearbyint"; case ISD::STRICT_FNEARBYINT: return "strict_fnearbyint"; case ISD::FROUND: return "fround"; + case ISD::STRICT_FROUND: return "strict_fround"; case ISD::FEXP: return "fexp"; case ISD::STRICT_FEXP: return "strict_fexp"; case ISD::FEXP2: return "fexp2"; Index: llvm/trunk/lib/IR/IntrinsicInst.cpp =================================================================== --- llvm/trunk/lib/IR/IntrinsicInst.cpp +++ llvm/trunk/lib/IR/IntrinsicInst.cpp @@ -152,6 +152,10 @@ case Intrinsic::experimental_constrained_log2: case Intrinsic::experimental_constrained_rint: case Intrinsic::experimental_constrained_nearbyint: + case Intrinsic::experimental_constrained_ceil: + case Intrinsic::experimental_constrained_floor: + case Intrinsic::experimental_constrained_round: + case Intrinsic::experimental_constrained_trunc: return true; } } Index: llvm/trunk/lib/IR/Verifier.cpp =================================================================== --- llvm/trunk/lib/IR/Verifier.cpp +++ llvm/trunk/lib/IR/Verifier.cpp @@ -4106,6 +4106,10 @@ case Intrinsic::experimental_constrained_nearbyint: case Intrinsic::experimental_constrained_maxnum: case Intrinsic::experimental_constrained_minnum: + case Intrinsic::experimental_constrained_ceil: + case Intrinsic::experimental_constrained_floor: + case Intrinsic::experimental_constrained_round: + case Intrinsic::experimental_constrained_trunc: visitConstrainedFPIntrinsic( cast(*CS.getInstruction())); break; Index: llvm/trunk/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll +++ llvm/trunk/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll @@ -2423,6 +2423,409 @@ ret <4 x double> %min } +define <1 x float> @constrained_vector_ceil_v1f32() { +; CHECK-LABEL: constrained_vector_ceil_v1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: callq ceilf +; CHECK-NEXT: popq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +entry: + %ceil = call <1 x float> @llvm.experimental.constrained.ceil.v1f32( + <1 x float> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <1 x float> %ceil +} + +define <2 x double> @constrained_vector_ceil_v2f64() { +; CHECK-LABEL: constrained_vector_ceil_v2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: callq ceil +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: callq ceil +; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +entry: + %ceil = call <2 x double> @llvm.experimental.constrained.ceil.v2f64( + <2 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <2 x double> %ceil +} + +define <3 x float> @constrained_vector_ceil_v3f32() { +; CHECK-LABEL: constrained_vector_ceil_v3f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: callq ceilf +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: callq ceilf +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: callq ceilf +; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload +; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload +; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] +; CHECK-NEXT: movaps %xmm1, %xmm0 +; CHECK-NEXT: addq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +entry: + %ceil = call <3 x float> @llvm.experimental.constrained.ceil.v3f32( + <3 x float> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <3 x float> %ceil +} + +define <3 x double> @constrained_vector_ceil_v3f64() { +; CHECK-LABEL: constrained_vector_ceil_v3f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: callq ceil +; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: callq ceil +; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: callq ceil +; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) +; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) +; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload +; CHECK-NEXT: # xmm0 = mem[0],zero +; CHECK-NEXT: movsd (%rsp), %xmm1 # 8-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +entry: + %ceil = call <3 x double> @llvm.experimental.constrained.ceil.v3f64( + <3 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <3 x double> %ceil +} + +define <1 x float> @constrained_vector_floor_v1f32() { +; CHECK-LABEL: constrained_vector_floor_v1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: callq floorf +; CHECK-NEXT: popq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +entry: + %floor = call <1 x float> @llvm.experimental.constrained.floor.v1f32( + <1 x float> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <1 x float> %floor +} + + +define <2 x double> @constrained_vector_floor_v2f64() { +; CHECK-LABEL: constrained_vector_floor_v2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: callq floor +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: callq floor +; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +entry: + %floor = call <2 x double> @llvm.experimental.constrained.floor.v2f64( + <2 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <2 x double> %floor +} + +define <3 x float> @constrained_vector_floor_v3f32() { +; CHECK-LABEL: constrained_vector_floor_v3f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: callq floorf +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: callq floorf +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: callq floorf +; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload +; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload +; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] +; CHECK-NEXT: movaps %xmm1, %xmm0 +; CHECK-NEXT: addq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +entry: + %floor = call <3 x float> @llvm.experimental.constrained.floor.v3f32( + <3 x float> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <3 x float> %floor +} + +define <3 x double> @constrained_vector_floor_v3f64() { +; CHECK-LABEL: constrained_vector_floor_v3f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: callq floor +; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: callq floor +; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: callq floor +; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) +; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) +; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload +; CHECK-NEXT: # xmm0 = mem[0],zero +; CHECK-NEXT: movsd (%rsp), %xmm1 # 8-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +entry: + %floor = call <3 x double> @llvm.experimental.constrained.floor.v3f64( + <3 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <3 x double> %floor +} + +define <1 x float> @constrained_vector_round_v1f32() { +; CHECK-LABEL: constrained_vector_round_v1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: callq roundf +; CHECK-NEXT: popq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +entry: + %round = call <1 x float> @llvm.experimental.constrained.round.v1f32( + <1 x float> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <1 x float> %round +} + +define <2 x double> @constrained_vector_round_v2f64() { +; CHECK-LABEL: constrained_vector_round_v2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: callq round +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: callq round +; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +entry: + %round = call <2 x double> @llvm.experimental.constrained.round.v2f64( + <2 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <2 x double> %round +} + +define <3 x float> @constrained_vector_round_v3f32() { +; CHECK-LABEL: constrained_vector_round_v3f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: callq roundf +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: callq roundf +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: callq roundf +; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload +; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload +; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] +; CHECK-NEXT: movaps %xmm1, %xmm0 +; CHECK-NEXT: addq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +entry: + %round = call <3 x float> @llvm.experimental.constrained.round.v3f32( + <3 x float> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <3 x float> %round +} + + +define <3 x double> @constrained_vector_round_v3f64() { +; CHECK-LABEL: constrained_vector_round_v3f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: callq round +; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: callq round +; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: callq round +; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) +; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) +; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload +; CHECK-NEXT: # xmm0 = mem[0],zero +; CHECK-NEXT: movsd (%rsp), %xmm1 # 8-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +entry: + %round = call <3 x double> @llvm.experimental.constrained.round.v3f64( + <3 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <3 x double> %round +} + +define <1 x float> @constrained_vector_trunc_v1f32() { +; CHECK-LABEL: constrained_vector_trunc_v1f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: callq truncf +; CHECK-NEXT: popq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +entry: + %trunc = call <1 x float> @llvm.experimental.constrained.trunc.v1f32( + <1 x float> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <1 x float> %trunc +} + +define <2 x double> @constrained_vector_trunc_v2f64() { +; CHECK-LABEL: constrained_vector_trunc_v2f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: callq trunc +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: callq trunc +; CHECK-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload +; CHECK-NEXT: # xmm0 = xmm0[0],mem[0] +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +entry: + %trunc = call <2 x double> @llvm.experimental.constrained.trunc.v2f64( + <2 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <2 x double> %trunc +} + +define <3 x float> @constrained_vector_trunc_v3f32() { +; CHECK-LABEL: constrained_vector_trunc_v3f32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: callq truncf +; CHECK-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: callq truncf +; CHECK-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: callq truncf +; CHECK-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload +; CHECK-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; CHECK-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload +; CHECK-NEXT: # xmm1 = xmm1[0],mem[0] +; CHECK-NEXT: movaps %xmm1, %xmm0 +; CHECK-NEXT: addq $40, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +entry: + %trunc = call <3 x float> @llvm.experimental.constrained.trunc.v3f32( + <3 x float> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <3 x float> %trunc +} + +define <3 x double> @constrained_vector_trunc_v3f64() { +; CHECK-LABEL: constrained_vector_trunc_v3f64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: subq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: callq trunc +; CHECK-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: callq trunc +; CHECK-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: callq trunc +; CHECK-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) +; CHECK-NEXT: fldl {{[0-9]+}}(%rsp) +; CHECK-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload +; CHECK-NEXT: # xmm0 = mem[0],zero +; CHECK-NEXT: movsd (%rsp), %xmm1 # 8-byte Reload +; CHECK-NEXT: # xmm1 = mem[0],zero +; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: retq +entry: + %trunc = call <3 x double> @llvm.experimental.constrained.trunc.v3f64( + <3 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <3 x double> %trunc +} + + ; Single width declarations declare <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double>, <2 x double>, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.fsub.v2f64(<2 x double>, <2 x double>, metadata, metadata) @@ -2443,6 +2846,10 @@ declare <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(<2 x double>, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.maxnum.v2f64(<2 x double>, <2 x double>, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.minnum.v2f64(<2 x double>, <2 x double>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.ceil.v2f64(<2 x double>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.floor.v2f64(<2 x double>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.round.v2f64(<2 x double>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.trunc.v2f64(<2 x double>, metadata, metadata) ; Scalar width declarations declare <1 x float> @llvm.experimental.constrained.fadd.v1f32(<1 x float>, <1 x float>, metadata, metadata) @@ -2464,6 +2871,10 @@ declare <1 x float> @llvm.experimental.constrained.nearbyint.v1f32(<1 x float>, metadata, metadata) declare <1 x float> @llvm.experimental.constrained.maxnum.v1f32(<1 x float>, <1 x float>, metadata, metadata) declare <1 x float> @llvm.experimental.constrained.minnum.v1f32(<1 x float>, <1 x float>, metadata, metadata) +declare <1 x float> @llvm.experimental.constrained.ceil.v1f32(<1 x float>, metadata, metadata) +declare <1 x float> @llvm.experimental.constrained.floor.v1f32(<1 x float>, metadata, metadata) +declare <1 x float> @llvm.experimental.constrained.round.v1f32(<1 x float>, metadata, metadata) +declare <1 x float> @llvm.experimental.constrained.trunc.v1f32(<1 x float>, metadata, metadata) ; Illegal width declarations declare <3 x float> @llvm.experimental.constrained.fadd.v3f32(<3 x float>, <3 x float>, metadata, metadata) @@ -2504,6 +2915,14 @@ declare <3 x double> @llvm.experimental.constrained.maxnum.v3f64(<3 x double>, <3 x double>, metadata, metadata) declare <3 x float> @llvm.experimental.constrained.minnum.v3f32(<3 x float>, <3 x float>, metadata, metadata) declare <3 x double> @llvm.experimental.constrained.minnum.v3f64(<3 x double>, <3 x double>, metadata, metadata) +declare <3 x float> @llvm.experimental.constrained.ceil.v3f32(<3 x float>, metadata, metadata) +declare <3 x double> @llvm.experimental.constrained.ceil.v3f64(<3 x double>, metadata, metadata) +declare <3 x float> @llvm.experimental.constrained.floor.v3f32(<3 x float>, metadata, metadata) +declare <3 x double> @llvm.experimental.constrained.floor.v3f64(<3 x double>, metadata, metadata) +declare <3 x float> @llvm.experimental.constrained.round.v3f32(<3 x float>, metadata, metadata) +declare <3 x double> @llvm.experimental.constrained.round.v3f64(<3 x double>, metadata, metadata) +declare <3 x float> @llvm.experimental.constrained.trunc.v3f32(<3 x float>, metadata, metadata) +declare <3 x double> @llvm.experimental.constrained.trunc.v3f64(<3 x double>, metadata, metadata) ; Double width declarations declare <4 x double> @llvm.experimental.constrained.fadd.v4f64(<4 x double>, <4 x double>, metadata, metadata) @@ -2525,3 +2944,8 @@ declare <4 x double> @llvm.experimental.constrained.nearbyint.v4f64(<4 x double>, metadata, metadata) declare <4 x double> @llvm.experimental.constrained.maxnum.v4f64(<4 x double>, <4 x double>, metadata, metadata) declare <4 x double> @llvm.experimental.constrained.minnum.v4f64(<4 x double>, <4 x double>, metadata, metadata) +declare <4 x double> @llvm.experimental.constrained.ceil.v4f64(<4 x double>, metadata, metadata) +declare <4 x double> @llvm.experimental.constrained.floor.v4f64(<4 x double>, metadata, metadata) +declare <4 x double> @llvm.experimental.constrained.round.v4f64(<4 x double>, metadata, metadata) +declare <4 x double> @llvm.experimental.constrained.trunc.v4f64(<4 x double>, metadata, metadata) +