diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -897,27 +897,50 @@ continue; } case ISD::FCEIL: + case ISD::STRICT_FCEIL: case ISD::FFLOOR: + case ISD::STRICT_FFLOOR: case ISD::FTRUNC: + case ISD::STRICT_FTRUNC: case ISD::FNEARBYINT: - case ISD::FRINT: { + case ISD::STRICT_FNEARBYINT: + case ISD::FRINT: + case ISD::STRICT_FRINT: { // Replace fp rounding with their X86 specific equivalent so we don't // need 2 sets of patterns. unsigned Imm; switch (N->getOpcode()) { default: llvm_unreachable("Unexpected opcode!"); + case ISD::STRICT_FCEIL: case ISD::FCEIL: Imm = 0xA; break; + case ISD::STRICT_FFLOOR: case ISD::FFLOOR: Imm = 0x9; break; + case ISD::STRICT_FTRUNC: case ISD::FTRUNC: Imm = 0xB; break; + case ISD::STRICT_FNEARBYINT: case ISD::FNEARBYINT: Imm = 0xC; break; + case ISD::STRICT_FRINT: case ISD::FRINT: Imm = 0x4; break; } SDLoc dl(N); - SDValue Res = CurDAG->getNode( - X86ISD::VRNDSCALE, dl, N->getValueType(0), N->getOperand(0), - CurDAG->getTargetConstant(Imm, dl, MVT::i8)); + bool IsStrict = N->isStrictFPOpcode(); + SDValue Res; + if (IsStrict) + Res = CurDAG->getNode(X86ISD::STRICT_VRNDSCALE, dl, + {N->getValueType(0), MVT::Other}, + {N->getOperand(0), N->getOperand(1), + CurDAG->getTargetConstant(Imm, dl, MVT::i8)}); + else + Res = CurDAG->getNode(X86ISD::VRNDSCALE, dl, N->getValueType(0), + N->getOperand(0), + CurDAG->getTargetConstant(Imm, dl, MVT::i8)); --I; - CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res); + if (IsStrict) { + SDValue From[] = {SDValue(N, 0), SDValue(N, 1)}; + SDValue To[] = {Res.getValue(0), Res.getValue(1)}; + CurDAG->ReplaceAllUsesOfValuesWith(From, To, 2); + } else + CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res); ++I; CurDAG->DeleteNode(N); continue; diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -424,7 +424,7 @@ // RndScale - Round FP Values To Include A Given Number Of Fraction Bits. // Also used by the legacy (V)ROUND intrinsics where we mask out the // scaling part of the immediate. - VRNDSCALE, VRNDSCALE_SAE, VRNDSCALES, VRNDSCALES_SAE, + VRNDSCALE, VRNDSCALE_SAE, VRNDSCALES, VRNDSCALES_SAE, STRICT_VRNDSCALE, // Tests Types Of a FP Values for packed types. VFPCLASS, // Tests Types Of a FP Values for scalar types. diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -1068,11 +1068,16 @@ if (!Subtarget.useSoftFloat() && Subtarget.hasSSE41()) { for (MVT RoundedTy : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) { - setOperationAction(ISD::FFLOOR, RoundedTy, Legal); - setOperationAction(ISD::FCEIL, RoundedTy, Legal); - setOperationAction(ISD::FTRUNC, RoundedTy, Legal); - setOperationAction(ISD::FRINT, RoundedTy, Legal); - setOperationAction(ISD::FNEARBYINT, RoundedTy, Legal); + setOperationAction(ISD::FFLOOR, RoundedTy, Legal); + setOperationAction(ISD::STRICT_FFLOOR, RoundedTy, Legal); + setOperationAction(ISD::FCEIL, RoundedTy, Legal); + setOperationAction(ISD::STRICT_FCEIL, RoundedTy, Legal); + setOperationAction(ISD::FTRUNC, RoundedTy, Legal); + setOperationAction(ISD::STRICT_FTRUNC, RoundedTy, Legal); + setOperationAction(ISD::FRINT, RoundedTy, Legal); + setOperationAction(ISD::STRICT_FRINT, RoundedTy, Legal); + setOperationAction(ISD::FNEARBYINT, RoundedTy, Legal); + setOperationAction(ISD::STRICT_FNEARBYINT, RoundedTy, Legal); } setOperationAction(ISD::SMAX, MVT::v16i8, Legal); @@ -1144,14 +1149,19 @@ : &X86::VR256RegClass); for (auto VT : { MVT::v8f32, MVT::v4f64 }) { - setOperationAction(ISD::FFLOOR, VT, Legal); - setOperationAction(ISD::FCEIL, VT, Legal); - setOperationAction(ISD::FTRUNC, VT, Legal); - setOperationAction(ISD::FRINT, VT, Legal); - setOperationAction(ISD::FNEARBYINT, VT, Legal); - setOperationAction(ISD::FNEG, VT, Custom); - setOperationAction(ISD::FABS, VT, Custom); - setOperationAction(ISD::FCOPYSIGN, VT, Custom); + setOperationAction(ISD::FFLOOR, VT, Legal); + setOperationAction(ISD::STRICT_FFLOOR, VT, Legal); + setOperationAction(ISD::FCEIL, VT, Legal); + setOperationAction(ISD::STRICT_FCEIL, VT, Legal); + setOperationAction(ISD::FTRUNC, VT, Legal); + setOperationAction(ISD::STRICT_FTRUNC, VT, Legal); + setOperationAction(ISD::FRINT, VT, Legal); + setOperationAction(ISD::STRICT_FRINT, VT, Legal); + setOperationAction(ISD::FNEARBYINT, VT, Legal); + setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal); + setOperationAction(ISD::FNEG, VT, Custom); + setOperationAction(ISD::FABS, VT, Custom); + setOperationAction(ISD::FCOPYSIGN, VT, Custom); } // (fp_to_int:v8i16 (v8f32 ..)) requires the result type to be promoted @@ -1503,11 +1513,16 @@ setOperationAction(ISD::SIGN_EXTEND, MVT::v8i8, Custom); for (auto VT : { MVT::v16f32, MVT::v8f64 }) { - setOperationAction(ISD::FFLOOR, VT, Legal); - setOperationAction(ISD::FCEIL, VT, Legal); - setOperationAction(ISD::FTRUNC, VT, Legal); - setOperationAction(ISD::FRINT, VT, Legal); - setOperationAction(ISD::FNEARBYINT, VT, Legal); + setOperationAction(ISD::FFLOOR, VT, Legal); + setOperationAction(ISD::STRICT_FFLOOR, VT, Legal); + setOperationAction(ISD::FCEIL, VT, Legal); + setOperationAction(ISD::STRICT_FCEIL, VT, Legal); + setOperationAction(ISD::FTRUNC, VT, Legal); + setOperationAction(ISD::STRICT_FTRUNC, VT, Legal); + setOperationAction(ISD::FRINT, VT, Legal); + setOperationAction(ISD::STRICT_FRINT, VT, Legal); + setOperationAction(ISD::FNEARBYINT, VT, Legal); + setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal); setOperationAction(ISD::SELECT, VT, Custom); } @@ -29650,6 +29665,7 @@ case X86ISD::VPMADD52H: return "X86ISD::VPMADD52H"; case X86ISD::VPMADD52L: return "X86ISD::VPMADD52L"; case X86ISD::VRNDSCALE: return "X86ISD::VRNDSCALE"; + case X86ISD::STRICT_VRNDSCALE: return "X86ISD::STRICT_VRNDSCALE"; case X86ISD::VRNDSCALE_SAE: return "X86ISD::VRNDSCALE_SAE"; case X86ISD::VRNDSCALES: return "X86ISD::VRNDSCALES"; case X86ISD::VRNDSCALES_SAE: return "X86ISD::VRNDSCALES_SAE"; diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -9019,13 +9019,13 @@ } let Predicates = [HasAVX512] in { - def : Pat<(X86VRndScale _.FRC:$src1, timm:$src2), + def : Pat<(X86any_VRndScale _.FRC:$src1, timm:$src2), (_.EltVT (!cast(NAME##r) (_.EltVT (IMPLICIT_DEF)), _.FRC:$src1, timm:$src2))>; } let Predicates = [HasAVX512, OptForSize] in { - def : Pat<(X86VRndScale (_.ScalarLdFrag addr:$src1), timm:$src2), + def : Pat<(X86any_VRndScale (_.ScalarLdFrag addr:$src1), timm:$src2), (_.EltVT (!cast(NAME##m) (_.EltVT (IMPLICIT_DEF)), addr:$src1, timm:$src2))>; } @@ -10290,7 +10290,7 @@ X86VReduce, X86VReduceSAE, SchedWriteFRnd, HasDQI>, AVX512AIi8Base, EVEX; defm VRNDSCALE : avx512_common_unary_fp_sae_packed_imm_all<"vrndscale", 0x08, 0x09, - X86VRndScale, X86VRndScaleSAE, SchedWriteFRnd, HasAVX512>, + X86any_VRndScale, X86VRndScaleSAE, SchedWriteFRnd, HasAVX512>, AVX512AIi8Base, EVEX; defm VGETMANT : avx512_common_unary_fp_sae_packed_imm_all<"vgetmant", 0x26, 0x26, X86VGetMant, X86VGetMantSAE, SchedWriteFRnd, HasAVX512>, diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td --- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -466,6 +466,12 @@ def X86VReduce : SDNode<"X86ISD::VREDUCE", SDTFPUnaryOpImm>; def X86VReduceSAE : SDNode<"X86ISD::VREDUCE_SAE", SDTFPUnaryOpImm>; def X86VRndScale : SDNode<"X86ISD::VRNDSCALE", SDTFPUnaryOpImm>; +def X86strict_VRndScale : SDNode<"X86ISD::STRICT_VRNDSCALE", SDTFPUnaryOpImm, + [SDNPHasChain]>; +def X86any_VRndScale : PatFrags<(ops node:$src1, node:$src2), + [(X86strict_VRndScale node:$src1, node:$src2), + (X86VRndScale node:$src1, node:$src2)]>; + def X86VRndScaleSAE: SDNode<"X86ISD::VRNDSCALE_SAE", SDTFPUnaryOpImm>; def X86VGetMant : SDNode<"X86ISD::VGETMANT", SDTFPUnaryOpImm>; def X86VGetMantSAE : SDNode<"X86ISD::VGETMANT_SAE", SDTFPUnaryOpImm>; diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -5540,19 +5540,19 @@ let ExeDomain = SSEPackedSingle, Uses = [MXCSR], mayRaiseFPException = 1 in { // Intrinsic form defm VROUNDPS : sse41_fp_unop_p<0x08, "vroundps", f128mem, VR128, v4f32, - loadv4f32, X86VRndScale, SchedWriteFRnd.XMM>, + loadv4f32, X86any_VRndScale, SchedWriteFRnd.XMM>, VEX, VEX_WIG; defm VROUNDPSY : sse41_fp_unop_p<0x08, "vroundps", f256mem, VR256, v8f32, - loadv8f32, X86VRndScale, SchedWriteFRnd.YMM>, + loadv8f32, X86any_VRndScale, SchedWriteFRnd.YMM>, VEX, VEX_L, VEX_WIG; } let ExeDomain = SSEPackedDouble, Uses = [MXCSR], mayRaiseFPException = 1 in { defm VROUNDPD : sse41_fp_unop_p<0x09, "vroundpd", f128mem, VR128, v2f64, - loadv2f64, X86VRndScale, SchedWriteFRnd.XMM>, + loadv2f64, X86any_VRndScale, SchedWriteFRnd.XMM>, VEX, VEX_WIG; defm VROUNDPDY : sse41_fp_unop_p<0x09, "vroundpd", f256mem, VR256, v4f64, - loadv4f64, X86VRndScale, SchedWriteFRnd.YMM>, + loadv4f64, X86any_VRndScale, SchedWriteFRnd.YMM>, VEX, VEX_L, VEX_WIG; } } @@ -5565,25 +5565,25 @@ } let Predicates = [UseAVX] in { - def : Pat<(X86VRndScale FR32:$src1, timm:$src2), + def : Pat<(X86any_VRndScale FR32:$src1, timm:$src2), (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src1, timm:$src2)>; - def : Pat<(X86VRndScale FR64:$src1, timm:$src2), + def : Pat<(X86any_VRndScale FR64:$src1, timm:$src2), (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src1, timm:$src2)>; } let Predicates = [UseAVX, OptForSize] in { - def : Pat<(X86VRndScale (loadf32 addr:$src1), timm:$src2), + def : Pat<(X86any_VRndScale (loadf32 addr:$src1), timm:$src2), (VROUNDSSm (f32 (IMPLICIT_DEF)), addr:$src1, timm:$src2)>; - def : Pat<(X86VRndScale (loadf64 addr:$src1), timm:$src2), + def : Pat<(X86any_VRndScale (loadf64 addr:$src1), timm:$src2), (VROUNDSDm (f64 (IMPLICIT_DEF)), addr:$src1, timm:$src2)>; } let ExeDomain = SSEPackedSingle in defm ROUNDPS : sse41_fp_unop_p<0x08, "roundps", f128mem, VR128, v4f32, - memopv4f32, X86VRndScale, SchedWriteFRnd.XMM>; + memopv4f32, X86any_VRndScale, SchedWriteFRnd.XMM>; let ExeDomain = SSEPackedDouble in defm ROUNDPD : sse41_fp_unop_p<0x09, "roundpd", f128mem, VR128, v2f64, - memopv2f64, X86VRndScale, SchedWriteFRnd.XMM>; + memopv2f64, X86any_VRndScale, SchedWriteFRnd.XMM>; defm ROUND : sse41_fp_unop_s<0x0A, 0x0B, "round", SchedWriteFRnd.Scl>; @@ -5592,16 +5592,16 @@ v4f32, v2f64, X86RndScales>; let Predicates = [UseSSE41] in { - def : Pat<(X86VRndScale FR32:$src1, timm:$src2), + def : Pat<(X86any_VRndScale FR32:$src1, timm:$src2), (ROUNDSSr FR32:$src1, timm:$src2)>; - def : Pat<(X86VRndScale FR64:$src1, timm:$src2), + def : Pat<(X86any_VRndScale FR64:$src1, timm:$src2), (ROUNDSDr FR64:$src1, timm:$src2)>; } let Predicates = [UseSSE41, OptForSize] in { - def : Pat<(X86VRndScale (loadf32 addr:$src1), timm:$src2), + def : Pat<(X86any_VRndScale (loadf32 addr:$src1), timm:$src2), (ROUNDSSm addr:$src1, timm:$src2)>; - def : Pat<(X86VRndScale (loadf64 addr:$src1), timm:$src2), + def : Pat<(X86any_VRndScale (loadf64 addr:$src1), timm:$src2), (ROUNDSDm addr:$src1, timm:$src2)>; } diff --git a/llvm/test/CodeGen/X86/fp-strict-scalar-round.ll b/llvm/test/CodeGen/X86/fp-strict-scalar-round.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/fp-strict-scalar-round.ll @@ -0,0 +1,474 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse4.1 -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=SSE41,SSE41-X86 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=SSE41,SSE41-X64 +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=AVX-X86 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=AVX-X64 +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=AVX-X86,AVX512-X86 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=AVX-X64,AVX512-X64 + +declare float @llvm.experimental.constrained.ceil.f32(float, metadata) +declare double @llvm.experimental.constrained.ceil.f64(double, metadata) +declare float @llvm.experimental.constrained.floor.f32(float, metadata) +declare double @llvm.experimental.constrained.floor.f64(double, metadata) +declare float @llvm.experimental.constrained.trunc.f32(float, metadata) +declare double @llvm.experimental.constrained.trunc.f64(double, metadata) +declare float @llvm.experimental.constrained.rint.f32(float, metadata, metadata) +declare double @llvm.experimental.constrained.rint.f64(double, metadata, metadata) +declare float @llvm.experimental.constrained.nearbyint.f32(float, metadata, metadata) +declare double @llvm.experimental.constrained.nearbyint.f64(double, metadata, metadata) + +define float @fceil32(float %f) #0 { +; SSE41-X86-LABEL: fceil32: +; SSE41-X86: # %bb.0: +; SSE41-X86-NEXT: pushl %eax +; SSE41-X86-NEXT: .cfi_def_cfa_offset 8 +; SSE41-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE41-X86-NEXT: roundss $10, %xmm0, %xmm0 +; SSE41-X86-NEXT: movss %xmm0, (%esp) +; SSE41-X86-NEXT: flds (%esp) +; SSE41-X86-NEXT: popl %eax +; SSE41-X86-NEXT: .cfi_def_cfa_offset 4 +; SSE41-X86-NEXT: retl +; +; SSE41-X64-LABEL: fceil32: +; SSE41-X64: # %bb.0: +; SSE41-X64-NEXT: roundss $10, %xmm0, %xmm0 +; SSE41-X64-NEXT: retq +; +; AVX-X86-LABEL: fceil32: +; AVX-X86: # %bb.0: +; AVX-X86-NEXT: pushl %eax +; AVX-X86-NEXT: .cfi_def_cfa_offset 8 +; AVX-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX-X86-NEXT: vroundss $10, %xmm0, %xmm0, %xmm0 +; AVX-X86-NEXT: vmovss %xmm0, (%esp) +; AVX-X86-NEXT: flds (%esp) +; AVX-X86-NEXT: popl %eax +; AVX-X86-NEXT: .cfi_def_cfa_offset 4 +; AVX-X86-NEXT: retl +; +; AVX-X64-LABEL: fceil32: +; AVX-X64: # %bb.0: +; AVX-X64-NEXT: vroundss $10, %xmm0, %xmm0, %xmm0 +; AVX-X64-NEXT: retq + %res = call float @llvm.experimental.constrained.ceil.f32( + float %f, metadata !"fpexcept.strict") + ret float %res +} + +define double @fceilf64(double %f) #0 { +; SSE41-X86-LABEL: fceilf64: +; SSE41-X86: # %bb.0: +; SSE41-X86-NEXT: pushl %ebp +; SSE41-X86-NEXT: .cfi_def_cfa_offset 8 +; SSE41-X86-NEXT: .cfi_offset %ebp, -8 +; SSE41-X86-NEXT: movl %esp, %ebp +; SSE41-X86-NEXT: .cfi_def_cfa_register %ebp +; SSE41-X86-NEXT: andl $-8, %esp +; SSE41-X86-NEXT: subl $8, %esp +; SSE41-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; SSE41-X86-NEXT: roundsd $10, %xmm0, %xmm0 +; SSE41-X86-NEXT: movsd %xmm0, (%esp) +; SSE41-X86-NEXT: fldl (%esp) +; SSE41-X86-NEXT: movl %ebp, %esp +; SSE41-X86-NEXT: popl %ebp +; SSE41-X86-NEXT: .cfi_def_cfa %esp, 4 +; SSE41-X86-NEXT: retl +; +; SSE41-X64-LABEL: fceilf64: +; SSE41-X64: # %bb.0: +; SSE41-X64-NEXT: roundsd $10, %xmm0, %xmm0 +; SSE41-X64-NEXT: retq +; +; AVX-X86-LABEL: fceilf64: +; AVX-X86: # %bb.0: +; AVX-X86-NEXT: pushl %ebp +; AVX-X86-NEXT: .cfi_def_cfa_offset 8 +; AVX-X86-NEXT: .cfi_offset %ebp, -8 +; AVX-X86-NEXT: movl %esp, %ebp +; AVX-X86-NEXT: .cfi_def_cfa_register %ebp +; AVX-X86-NEXT: andl $-8, %esp +; AVX-X86-NEXT: subl $8, %esp +; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX-X86-NEXT: vroundsd $10, %xmm0, %xmm0, %xmm0 +; AVX-X86-NEXT: vmovsd %xmm0, (%esp) +; AVX-X86-NEXT: fldl (%esp) +; AVX-X86-NEXT: movl %ebp, %esp +; AVX-X86-NEXT: popl %ebp +; AVX-X86-NEXT: .cfi_def_cfa %esp, 4 +; AVX-X86-NEXT: retl +; +; AVX-X64-LABEL: fceilf64: +; AVX-X64: # %bb.0: +; AVX-X64-NEXT: vroundsd $10, %xmm0, %xmm0, %xmm0 +; AVX-X64-NEXT: retq + %res = call double @llvm.experimental.constrained.ceil.f64( + double %f, metadata !"fpexcept.strict") + ret double %res +} + +define float @ffloor32(float %f) #0 { +; SSE41-X86-LABEL: ffloor32: +; SSE41-X86: # %bb.0: +; SSE41-X86-NEXT: pushl %eax +; SSE41-X86-NEXT: .cfi_def_cfa_offset 8 +; SSE41-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE41-X86-NEXT: roundss $9, %xmm0, %xmm0 +; SSE41-X86-NEXT: movss %xmm0, (%esp) +; SSE41-X86-NEXT: flds (%esp) +; SSE41-X86-NEXT: popl %eax +; SSE41-X86-NEXT: .cfi_def_cfa_offset 4 +; SSE41-X86-NEXT: retl +; +; SSE41-X64-LABEL: ffloor32: +; SSE41-X64: # %bb.0: +; SSE41-X64-NEXT: roundss $9, %xmm0, %xmm0 +; SSE41-X64-NEXT: retq +; +; AVX-X86-LABEL: ffloor32: +; AVX-X86: # %bb.0: +; AVX-X86-NEXT: pushl %eax +; AVX-X86-NEXT: .cfi_def_cfa_offset 8 +; AVX-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX-X86-NEXT: vroundss $9, %xmm0, %xmm0, %xmm0 +; AVX-X86-NEXT: vmovss %xmm0, (%esp) +; AVX-X86-NEXT: flds (%esp) +; AVX-X86-NEXT: popl %eax +; AVX-X86-NEXT: .cfi_def_cfa_offset 4 +; AVX-X86-NEXT: retl +; +; AVX-X64-LABEL: ffloor32: +; AVX-X64: # %bb.0: +; AVX-X64-NEXT: vroundss $9, %xmm0, %xmm0, %xmm0 +; AVX-X64-NEXT: retq + %res = call float @llvm.experimental.constrained.floor.f32( + float %f, metadata !"fpexcept.strict") + ret float %res +} + +define double @ffloorf64(double %f) #0 { +; SSE41-X86-LABEL: ffloorf64: +; SSE41-X86: # %bb.0: +; SSE41-X86-NEXT: pushl %ebp +; SSE41-X86-NEXT: .cfi_def_cfa_offset 8 +; SSE41-X86-NEXT: .cfi_offset %ebp, -8 +; SSE41-X86-NEXT: movl %esp, %ebp +; SSE41-X86-NEXT: .cfi_def_cfa_register %ebp +; SSE41-X86-NEXT: andl $-8, %esp +; SSE41-X86-NEXT: subl $8, %esp +; SSE41-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; SSE41-X86-NEXT: roundsd $9, %xmm0, %xmm0 +; SSE41-X86-NEXT: movsd %xmm0, (%esp) +; SSE41-X86-NEXT: fldl (%esp) +; SSE41-X86-NEXT: movl %ebp, %esp +; SSE41-X86-NEXT: popl %ebp +; SSE41-X86-NEXT: .cfi_def_cfa %esp, 4 +; SSE41-X86-NEXT: retl +; +; SSE41-X64-LABEL: ffloorf64: +; SSE41-X64: # %bb.0: +; SSE41-X64-NEXT: roundsd $9, %xmm0, %xmm0 +; SSE41-X64-NEXT: retq +; +; AVX-X86-LABEL: ffloorf64: +; AVX-X86: # %bb.0: +; AVX-X86-NEXT: pushl %ebp +; AVX-X86-NEXT: .cfi_def_cfa_offset 8 +; AVX-X86-NEXT: .cfi_offset %ebp, -8 +; AVX-X86-NEXT: movl %esp, %ebp +; AVX-X86-NEXT: .cfi_def_cfa_register %ebp +; AVX-X86-NEXT: andl $-8, %esp +; AVX-X86-NEXT: subl $8, %esp +; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX-X86-NEXT: vroundsd $9, %xmm0, %xmm0, %xmm0 +; AVX-X86-NEXT: vmovsd %xmm0, (%esp) +; AVX-X86-NEXT: fldl (%esp) +; AVX-X86-NEXT: movl %ebp, %esp +; AVX-X86-NEXT: popl %ebp +; AVX-X86-NEXT: .cfi_def_cfa %esp, 4 +; AVX-X86-NEXT: retl +; +; AVX-X64-LABEL: ffloorf64: +; AVX-X64: # %bb.0: +; AVX-X64-NEXT: vroundsd $9, %xmm0, %xmm0, %xmm0 +; AVX-X64-NEXT: retq + %res = call double @llvm.experimental.constrained.floor.f64( + double %f, metadata !"fpexcept.strict") + ret double %res +} + +define float @ftrunc32(float %f) #0 { +; SSE41-X86-LABEL: ftrunc32: +; SSE41-X86: # %bb.0: +; SSE41-X86-NEXT: pushl %eax +; SSE41-X86-NEXT: .cfi_def_cfa_offset 8 +; SSE41-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE41-X86-NEXT: roundss $11, %xmm0, %xmm0 +; SSE41-X86-NEXT: movss %xmm0, (%esp) +; SSE41-X86-NEXT: flds (%esp) +; SSE41-X86-NEXT: popl %eax +; SSE41-X86-NEXT: .cfi_def_cfa_offset 4 +; SSE41-X86-NEXT: retl +; +; SSE41-X64-LABEL: ftrunc32: +; SSE41-X64: # %bb.0: +; SSE41-X64-NEXT: roundss $11, %xmm0, %xmm0 +; SSE41-X64-NEXT: retq +; +; AVX-X86-LABEL: ftrunc32: +; AVX-X86: # %bb.0: +; AVX-X86-NEXT: pushl %eax +; AVX-X86-NEXT: .cfi_def_cfa_offset 8 +; AVX-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX-X86-NEXT: vroundss $11, %xmm0, %xmm0, %xmm0 +; AVX-X86-NEXT: vmovss %xmm0, (%esp) +; AVX-X86-NEXT: flds (%esp) +; AVX-X86-NEXT: popl %eax +; AVX-X86-NEXT: .cfi_def_cfa_offset 4 +; AVX-X86-NEXT: retl +; +; AVX-X64-LABEL: ftrunc32: +; AVX-X64: # %bb.0: +; AVX-X64-NEXT: vroundss $11, %xmm0, %xmm0, %xmm0 +; AVX-X64-NEXT: retq + %res = call float @llvm.experimental.constrained.trunc.f32( + float %f, metadata !"fpexcept.strict") + ret float %res +} + +define double @ftruncf64(double %f) #0 { +; SSE41-X86-LABEL: ftruncf64: +; SSE41-X86: # %bb.0: +; SSE41-X86-NEXT: pushl %ebp +; SSE41-X86-NEXT: .cfi_def_cfa_offset 8 +; SSE41-X86-NEXT: .cfi_offset %ebp, -8 +; SSE41-X86-NEXT: movl %esp, %ebp +; SSE41-X86-NEXT: .cfi_def_cfa_register %ebp +; SSE41-X86-NEXT: andl $-8, %esp +; SSE41-X86-NEXT: subl $8, %esp +; SSE41-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; SSE41-X86-NEXT: roundsd $11, %xmm0, %xmm0 +; SSE41-X86-NEXT: movsd %xmm0, (%esp) +; SSE41-X86-NEXT: fldl (%esp) +; SSE41-X86-NEXT: movl %ebp, %esp +; SSE41-X86-NEXT: popl %ebp +; SSE41-X86-NEXT: .cfi_def_cfa %esp, 4 +; SSE41-X86-NEXT: retl +; +; SSE41-X64-LABEL: ftruncf64: +; SSE41-X64: # %bb.0: +; SSE41-X64-NEXT: roundsd $11, %xmm0, %xmm0 +; SSE41-X64-NEXT: retq +; +; AVX-X86-LABEL: ftruncf64: +; AVX-X86: # %bb.0: +; AVX-X86-NEXT: pushl %ebp +; AVX-X86-NEXT: .cfi_def_cfa_offset 8 +; AVX-X86-NEXT: .cfi_offset %ebp, -8 +; AVX-X86-NEXT: movl %esp, %ebp +; AVX-X86-NEXT: .cfi_def_cfa_register %ebp +; AVX-X86-NEXT: andl $-8, %esp +; AVX-X86-NEXT: subl $8, %esp +; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX-X86-NEXT: vroundsd $11, %xmm0, %xmm0, %xmm0 +; AVX-X86-NEXT: vmovsd %xmm0, (%esp) +; AVX-X86-NEXT: fldl (%esp) +; AVX-X86-NEXT: movl %ebp, %esp +; AVX-X86-NEXT: popl %ebp +; AVX-X86-NEXT: .cfi_def_cfa %esp, 4 +; AVX-X86-NEXT: retl +; +; AVX-X64-LABEL: ftruncf64: +; AVX-X64: # %bb.0: +; AVX-X64-NEXT: vroundsd $11, %xmm0, %xmm0, %xmm0 +; AVX-X64-NEXT: retq + %res = call double @llvm.experimental.constrained.trunc.f64( + double %f, metadata !"fpexcept.strict") + ret double %res +} + +define float @frint32(float %f) #0 { +; SSE41-X86-LABEL: frint32: +; SSE41-X86: # %bb.0: +; SSE41-X86-NEXT: pushl %eax +; SSE41-X86-NEXT: .cfi_def_cfa_offset 8 +; SSE41-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE41-X86-NEXT: roundss $4, %xmm0, %xmm0 +; SSE41-X86-NEXT: movss %xmm0, (%esp) +; SSE41-X86-NEXT: flds (%esp) +; SSE41-X86-NEXT: popl %eax +; SSE41-X86-NEXT: .cfi_def_cfa_offset 4 +; SSE41-X86-NEXT: retl +; +; SSE41-X64-LABEL: frint32: +; SSE41-X64: # %bb.0: +; SSE41-X64-NEXT: roundss $4, %xmm0, %xmm0 +; SSE41-X64-NEXT: retq +; +; AVX-X86-LABEL: frint32: +; AVX-X86: # %bb.0: +; AVX-X86-NEXT: pushl %eax +; AVX-X86-NEXT: .cfi_def_cfa_offset 8 +; AVX-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX-X86-NEXT: vroundss $4, %xmm0, %xmm0, %xmm0 +; AVX-X86-NEXT: vmovss %xmm0, (%esp) +; AVX-X86-NEXT: flds (%esp) +; AVX-X86-NEXT: popl %eax +; AVX-X86-NEXT: .cfi_def_cfa_offset 4 +; AVX-X86-NEXT: retl +; +; AVX-X64-LABEL: frint32: +; AVX-X64: # %bb.0: +; AVX-X64-NEXT: vroundss $4, %xmm0, %xmm0, %xmm0 +; AVX-X64-NEXT: retq + %res = call float @llvm.experimental.constrained.rint.f32( + float %f, + metadata !"round.dynamic", metadata !"fpexcept.strict") + ret float %res +} + +define double @frintf64(double %f) #0 { +; SSE41-X86-LABEL: frintf64: +; SSE41-X86: # %bb.0: +; SSE41-X86-NEXT: pushl %ebp +; SSE41-X86-NEXT: .cfi_def_cfa_offset 8 +; SSE41-X86-NEXT: .cfi_offset %ebp, -8 +; SSE41-X86-NEXT: movl %esp, %ebp +; SSE41-X86-NEXT: .cfi_def_cfa_register %ebp +; SSE41-X86-NEXT: andl $-8, %esp +; SSE41-X86-NEXT: subl $8, %esp +; SSE41-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; SSE41-X86-NEXT: roundsd $4, %xmm0, %xmm0 +; SSE41-X86-NEXT: movsd %xmm0, (%esp) +; SSE41-X86-NEXT: fldl (%esp) +; SSE41-X86-NEXT: movl %ebp, %esp +; SSE41-X86-NEXT: popl %ebp +; SSE41-X86-NEXT: .cfi_def_cfa %esp, 4 +; SSE41-X86-NEXT: retl +; +; SSE41-X64-LABEL: frintf64: +; SSE41-X64: # %bb.0: +; SSE41-X64-NEXT: roundsd $4, %xmm0, %xmm0 +; SSE41-X64-NEXT: retq +; +; AVX-X86-LABEL: frintf64: +; AVX-X86: # %bb.0: +; AVX-X86-NEXT: pushl %ebp +; AVX-X86-NEXT: .cfi_def_cfa_offset 8 +; AVX-X86-NEXT: .cfi_offset %ebp, -8 +; AVX-X86-NEXT: movl %esp, %ebp +; AVX-X86-NEXT: .cfi_def_cfa_register %ebp +; AVX-X86-NEXT: andl $-8, %esp +; AVX-X86-NEXT: subl $8, %esp +; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX-X86-NEXT: vroundsd $4, %xmm0, %xmm0, %xmm0 +; AVX-X86-NEXT: vmovsd %xmm0, (%esp) +; AVX-X86-NEXT: fldl (%esp) +; AVX-X86-NEXT: movl %ebp, %esp +; AVX-X86-NEXT: popl %ebp +; AVX-X86-NEXT: .cfi_def_cfa %esp, 4 +; AVX-X86-NEXT: retl +; +; AVX-X64-LABEL: frintf64: +; AVX-X64: # %bb.0: +; AVX-X64-NEXT: vroundsd $4, %xmm0, %xmm0, %xmm0 +; AVX-X64-NEXT: retq + %res = call double @llvm.experimental.constrained.rint.f64( + double %f, + metadata !"round.dynamic", metadata !"fpexcept.strict") + ret double %res +} + +define float @fnearbyint32(float %f) #0 { +; SSE41-X86-LABEL: fnearbyint32: +; SSE41-X86: # %bb.0: +; SSE41-X86-NEXT: pushl %eax +; SSE41-X86-NEXT: .cfi_def_cfa_offset 8 +; SSE41-X86-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; SSE41-X86-NEXT: roundss $12, %xmm0, %xmm0 +; SSE41-X86-NEXT: movss %xmm0, (%esp) +; SSE41-X86-NEXT: flds (%esp) +; SSE41-X86-NEXT: popl %eax +; SSE41-X86-NEXT: .cfi_def_cfa_offset 4 +; SSE41-X86-NEXT: retl +; +; SSE41-X64-LABEL: fnearbyint32: +; SSE41-X64: # %bb.0: +; SSE41-X64-NEXT: roundss $12, %xmm0, %xmm0 +; SSE41-X64-NEXT: retq +; +; AVX-X86-LABEL: fnearbyint32: +; AVX-X86: # %bb.0: +; AVX-X86-NEXT: pushl %eax +; AVX-X86-NEXT: .cfi_def_cfa_offset 8 +; AVX-X86-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; AVX-X86-NEXT: vroundss $12, %xmm0, %xmm0, %xmm0 +; AVX-X86-NEXT: vmovss %xmm0, (%esp) +; AVX-X86-NEXT: flds (%esp) +; AVX-X86-NEXT: popl %eax +; AVX-X86-NEXT: .cfi_def_cfa_offset 4 +; AVX-X86-NEXT: retl +; +; AVX-X64-LABEL: fnearbyint32: +; AVX-X64: # %bb.0: +; AVX-X64-NEXT: vroundss $12, %xmm0, %xmm0, %xmm0 +; AVX-X64-NEXT: retq + %res = call float @llvm.experimental.constrained.nearbyint.f32( + float %f, + metadata !"round.dynamic", metadata !"fpexcept.strict") + ret float %res +} + +define double @fnearbyintf64(double %f) #0 { +; SSE41-X86-LABEL: fnearbyintf64: +; SSE41-X86: # %bb.0: +; SSE41-X86-NEXT: pushl %ebp +; SSE41-X86-NEXT: .cfi_def_cfa_offset 8 +; SSE41-X86-NEXT: .cfi_offset %ebp, -8 +; SSE41-X86-NEXT: movl %esp, %ebp +; SSE41-X86-NEXT: .cfi_def_cfa_register %ebp +; SSE41-X86-NEXT: andl $-8, %esp +; SSE41-X86-NEXT: subl $8, %esp +; SSE41-X86-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; SSE41-X86-NEXT: roundsd $12, %xmm0, %xmm0 +; SSE41-X86-NEXT: movsd %xmm0, (%esp) +; SSE41-X86-NEXT: fldl (%esp) +; SSE41-X86-NEXT: movl %ebp, %esp +; SSE41-X86-NEXT: popl %ebp +; SSE41-X86-NEXT: .cfi_def_cfa %esp, 4 +; SSE41-X86-NEXT: retl +; +; SSE41-X64-LABEL: fnearbyintf64: +; SSE41-X64: # %bb.0: +; SSE41-X64-NEXT: roundsd $12, %xmm0, %xmm0 +; SSE41-X64-NEXT: retq +; +; AVX-X86-LABEL: fnearbyintf64: +; AVX-X86: # %bb.0: +; AVX-X86-NEXT: pushl %ebp +; AVX-X86-NEXT: .cfi_def_cfa_offset 8 +; AVX-X86-NEXT: .cfi_offset %ebp, -8 +; AVX-X86-NEXT: movl %esp, %ebp +; AVX-X86-NEXT: .cfi_def_cfa_register %ebp +; AVX-X86-NEXT: andl $-8, %esp +; AVX-X86-NEXT: subl $8, %esp +; AVX-X86-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; AVX-X86-NEXT: vroundsd $12, %xmm0, %xmm0, %xmm0 +; AVX-X86-NEXT: vmovsd %xmm0, (%esp) +; AVX-X86-NEXT: fldl (%esp) +; AVX-X86-NEXT: movl %ebp, %esp +; AVX-X86-NEXT: popl %ebp +; AVX-X86-NEXT: .cfi_def_cfa %esp, 4 +; AVX-X86-NEXT: retl +; +; AVX-X64-LABEL: fnearbyintf64: +; AVX-X64: # %bb.0: +; AVX-X64-NEXT: vroundsd $12, %xmm0, %xmm0, %xmm0 +; AVX-X64-NEXT: retq + %res = call double @llvm.experimental.constrained.nearbyint.f64( + double %f, + metadata !"round.dynamic", metadata !"fpexcept.strict") + ret double %res +} + +attributes #0 = { strictfp } diff --git a/llvm/test/CodeGen/X86/vec-strict-256.ll b/llvm/test/CodeGen/X86/vec-strict-256.ll --- a/llvm/test/CodeGen/X86/vec-strict-256.ll +++ b/llvm/test/CodeGen/X86/vec-strict-256.ll @@ -18,6 +18,16 @@ declare <4 x float> @llvm.experimental.constrained.fptrunc.v4f32.v4f64(<4 x double>, metadata, metadata) declare <4 x double> @llvm.experimental.constrained.fma.v4f64(<4 x double>, <4 x double>, <4 x double>, metadata, metadata) declare <8 x float> @llvm.experimental.constrained.fma.v8f32(<8 x float>, <8 x float>, <8 x float>, metadata, metadata) +declare <8 x float> @llvm.experimental.constrained.ceil.v8f32(<8 x float>, metadata) +declare <4 x double> @llvm.experimental.constrained.ceil.v4f64(<4 x double>, metadata) +declare <8 x float> @llvm.experimental.constrained.floor.v8f32(<8 x float>, metadata) +declare <4 x double> @llvm.experimental.constrained.floor.v4f64(<4 x double>, metadata) +declare <8 x float> @llvm.experimental.constrained.trunc.v8f32(<8 x float>, metadata) +declare <4 x double> @llvm.experimental.constrained.trunc.v4f64(<4 x double>, metadata) +declare <8 x float> @llvm.experimental.constrained.rint.v8f32(<8 x float>, metadata, metadata) +declare <4 x double> @llvm.experimental.constrained.rint.v4f64(<4 x double>, metadata, metadata) +declare <8 x float> @llvm.experimental.constrained.nearbyint.v8f32(<8 x float>, metadata, metadata) +declare <4 x double> @llvm.experimental.constrained.nearbyint.v4f64(<4 x double>, metadata, metadata) define <4 x double> @f1(<4 x double> %a, <4 x double> %b) #0 { ; CHECK-LABEL: f1: @@ -178,4 +188,111 @@ ret <4 x double> %res } +define <8 x float> @fceilv8f32(<8 x float> %f) #0 { +; CHECK-LABEL: fceilv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vroundps $10, %ymm0, %ymm0 +; CHECK-NEXT: ret{{[l|q]}} + %res = call <8 x float> @llvm.experimental.constrained.ceil.v8f32( + <8 x float> %f, metadata !"fpexcept.strict") + ret <8 x float> %res +} + +define <4 x double> @fceilv4f64(<4 x double> %f) #0 { +; CHECK-LABEL: fceilv4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vroundpd $10, %ymm0, %ymm0 +; CHECK-NEXT: ret{{[l|q]}} + %res = call <4 x double> @llvm.experimental.constrained.ceil.v4f64( + <4 x double> %f, metadata !"fpexcept.strict") + ret <4 x double> %res +} + +define <8 x float> @ffloorv8f32(<8 x float> %f) #0 { +; CHECK-LABEL: ffloorv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vroundps $9, %ymm0, %ymm0 +; CHECK-NEXT: ret{{[l|q]}} + %res = call <8 x float> @llvm.experimental.constrained.floor.v8f32( + <8 x float> %f, metadata !"fpexcept.strict") + ret <8 x float> %res +} + +define <4 x double> @ffloorv4f64(<4 x double> %f) #0 { +; CHECK-LABEL: ffloorv4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vroundpd $9, %ymm0, %ymm0 +; CHECK-NEXT: ret{{[l|q]}} + %res = call <4 x double> @llvm.experimental.constrained.floor.v4f64( + <4 x double> %f, metadata !"fpexcept.strict") + ret <4 x double> %res +} + + +define <8 x float> @ftruncv8f32(<8 x float> %f) #0 { +; CHECK-LABEL: ftruncv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vroundps $11, %ymm0, %ymm0 +; CHECK-NEXT: ret{{[l|q]}} + %res = call <8 x float> @llvm.experimental.constrained.trunc.v8f32( + <8 x float> %f, metadata !"fpexcept.strict") + ret <8 x float> %res +} + +define <4 x double> @ftruncv4f64(<4 x double> %f) #0 { +; CHECK-LABEL: ftruncv4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vroundpd $11, %ymm0, %ymm0 +; CHECK-NEXT: ret{{[l|q]}} + %res = call <4 x double> @llvm.experimental.constrained.trunc.v4f64( + <4 x double> %f, metadata !"fpexcept.strict") + ret <4 x double> %res +} + + +define <8 x float> @frintv8f32(<8 x float> %f) #0 { +; CHECK-LABEL: frintv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vroundps $4, %ymm0, %ymm0 +; CHECK-NEXT: ret{{[l|q]}} + %res = call <8 x float> @llvm.experimental.constrained.rint.v8f32( + <8 x float> %f, + metadata !"round.dynamic", metadata !"fpexcept.strict") + ret <8 x float> %res +} + +define <4 x double> @frintv4f64(<4 x double> %f) #0 { +; CHECK-LABEL: frintv4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vroundpd $4, %ymm0, %ymm0 +; CHECK-NEXT: ret{{[l|q]}} + %res = call <4 x double> @llvm.experimental.constrained.rint.v4f64( + <4 x double> %f, + metadata !"round.dynamic", metadata !"fpexcept.strict") + ret <4 x double> %res +} + + +define <8 x float> @fnearbyintv8f32(<8 x float> %f) #0 { +; CHECK-LABEL: fnearbyintv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vroundps $12, %ymm0, %ymm0 +; CHECK-NEXT: ret{{[l|q]}} + %res = call <8 x float> @llvm.experimental.constrained.nearbyint.v8f32( + <8 x float> %f, + metadata !"round.dynamic", metadata !"fpexcept.strict") + ret <8 x float> %res +} + +define <4 x double> @fnearbyintv4f64(<4 x double> %f) #0 { +; CHECK-LABEL: fnearbyintv4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vroundpd $12, %ymm0, %ymm0 +; CHECK-NEXT: ret{{[l|q]}} + %res = call <4 x double> @llvm.experimental.constrained.nearbyint.v4f64( + <4 x double> %f, + metadata !"round.dynamic", metadata !"fpexcept.strict") + ret <4 x double> %res +} + attributes #0 = { strictfp } diff --git a/llvm/test/CodeGen/X86/vec-strict-512.ll b/llvm/test/CodeGen/X86/vec-strict-512.ll --- a/llvm/test/CodeGen/X86/vec-strict-512.ll +++ b/llvm/test/CodeGen/X86/vec-strict-512.ll @@ -16,6 +16,17 @@ declare <8 x float> @llvm.experimental.constrained.fptrunc.v8f32.v8f64(<8 x double>, metadata, metadata) declare <8 x double> @llvm.experimental.constrained.fma.v8f64(<8 x double>, <8 x double>, <8 x double>, metadata, metadata) declare <16 x float> @llvm.experimental.constrained.fma.v16f32(<16 x float>, <16 x float>, <16 x float>, metadata, metadata) +declare <16 x float> @llvm.experimental.constrained.ceil.v16f32(<16 x float>, metadata) +declare <8 x double> @llvm.experimental.constrained.ceil.v8f64(<8 x double>, metadata) +declare <16 x float> @llvm.experimental.constrained.floor.v16f32(<16 x float>, metadata) +declare <8 x double> @llvm.experimental.constrained.floor.v8f64(<8 x double>, metadata) +declare <16 x float> @llvm.experimental.constrained.trunc.v16f32(<16 x float>, metadata) +declare <8 x double> @llvm.experimental.constrained.trunc.v8f64(<8 x double>, metadata) +declare <16 x float> @llvm.experimental.constrained.rint.v16f32(<16 x float>, metadata, metadata) +declare <8 x double> @llvm.experimental.constrained.rint.v8f64(<8 x double>, metadata, metadata) +declare <16 x float> @llvm.experimental.constrained.nearbyint.v16f32(<16 x float>, metadata, metadata) +declare <8 x double> @llvm.experimental.constrained.nearbyint.v8f64(<8 x double>, metadata, metadata) + define <8 x double> @f1(<8 x double> %a, <8 x double> %b) #0 { ; CHECK-LABEL: f1: @@ -175,4 +186,98 @@ ret <8 x double> %res } +define <16 x float> @strict_vector_fceil_v16f32(<16 x float> %f) #0 { +; CHECK-LABEL: strict_vector_fceil_v16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vrndscaleps $10, %zmm0, %zmm0 +; CHECK-NEXT: ret{{[l|q]}} + %res = call <16 x float> @llvm.experimental.constrained.ceil.v16f32(<16 x float> %f, metadata !"fpexcept.strict") + ret <16 x float> %res +} + +define <8 x double> @strict_vector_fceil_v8f64(<8 x double> %f) #0 { +; CHECK-LABEL: strict_vector_fceil_v8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vrndscalepd $10, %zmm0, %zmm0 +; CHECK-NEXT: ret{{[l|q]}} + %res = call <8 x double> @llvm.experimental.constrained.ceil.v8f64(<8 x double> %f, metadata !"fpexcept.strict") + ret <8 x double> %res +} + +define <16 x float> @strict_vector_ffloor_v16f32(<16 x float> %f) #0 { +; CHECK-LABEL: strict_vector_ffloor_v16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vrndscaleps $9, %zmm0, %zmm0 +; CHECK-NEXT: ret{{[l|q]}} + %res = call <16 x float> @llvm.experimental.constrained.floor.v16f32(<16 x float> %f, metadata !"fpexcept.strict") + ret <16 x float> %res +} + +define <8 x double> @strict_vector_ffloor_v8f64(<8 x double> %f) #0 { +; CHECK-LABEL: strict_vector_ffloor_v8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vrndscalepd $9, %zmm0, %zmm0 +; CHECK-NEXT: ret{{[l|q]}} + %res = call <8 x double> @llvm.experimental.constrained.floor.v8f64(<8 x double> %f, metadata !"fpexcept.strict") + ret <8 x double> %res +} + +define <16 x float> @strict_vector_ftrunc_v16f32(<16 x float> %f) #0 { +; CHECK-LABEL: strict_vector_ftrunc_v16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vrndscaleps $11, %zmm0, %zmm0 +; CHECK-NEXT: ret{{[l|q]}} + %res = call <16 x float> @llvm.experimental.constrained.trunc.v16f32(<16 x float> %f, metadata !"fpexcept.strict") + ret <16 x float> %res +} + +define <8 x double> @strict_vector_ftrunc_v8f64(<8 x double> %f) #0 { +; CHECK-LABEL: strict_vector_ftrunc_v8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vrndscalepd $11, %zmm0, %zmm0 +; CHECK-NEXT: ret{{[l|q]}} + %res = call <8 x double> @llvm.experimental.constrained.trunc.v8f64(<8 x double> %f, metadata !"fpexcept.strict") + ret <8 x double> %res +} + +define <16 x float> @strict_vector_frint_v16f32(<16 x float> %f) #0 { +; CHECK-LABEL: strict_vector_frint_v16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vrndscaleps $4, %zmm0, %zmm0 +; CHECK-NEXT: ret{{[l|q]}} + %res = call <16 x float> @llvm.experimental.constrained.rint.v16f32(<16 x float> %f, + metadata !"round.dynamic", metadata !"fpexcept.strict") + ret <16 x float> %res +} + +define <8 x double> @strict_vector_frint_v8f64(<8 x double> %f) #0 { +; CHECK-LABEL: strict_vector_frint_v8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vrndscalepd $4, %zmm0, %zmm0 +; CHECK-NEXT: ret{{[l|q]}} + %res = call <8 x double> @llvm.experimental.constrained.rint.v8f64(<8 x double> %f, + metadata !"round.dynamic", metadata !"fpexcept.strict") + ret <8 x double> %res +} + +define <16 x float> @strict_vector_fnearbyint_v16f32(<16 x float> %f) #0 { +; CHECK-LABEL: strict_vector_fnearbyint_v16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vrndscaleps $12, %zmm0, %zmm0 +; CHECK-NEXT: ret{{[l|q]}} + %res = call <16 x float> @llvm.experimental.constrained.nearbyint.v16f32(<16 x float> %f, + metadata !"round.dynamic", metadata !"fpexcept.strict") + ret <16 x float> %res +} + +define <8 x double> @strict_vector_fnearbyint_v8f64(<8 x double> %f) #0 { +; CHECK-LABEL: strict_vector_fnearbyint_v8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vrndscalepd $12, %zmm0, %zmm0 +; CHECK-NEXT: ret{{[l|q]}} + %res = call <8 x double> @llvm.experimental.constrained.nearbyint.v8f64(<8 x double> %f, + metadata !"round.dynamic", metadata !"fpexcept.strict") + ret <8 x double> %res +} + attributes #0 = { strictfp } diff --git a/llvm/test/CodeGen/X86/vec-strict-round-128.ll b/llvm/test/CodeGen/X86/vec-strict-round-128.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/X86/vec-strict-round-128.ll @@ -0,0 +1,174 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse4.1 -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=SSE41 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=SSE41 +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=AVX +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=AVX +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=AVX +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -O3 -disable-strictnode-mutation | FileCheck %s --check-prefixes=AVX + +declare <4 x float> @llvm.experimental.constrained.ceil.v4f32(<4 x float>, metadata) +declare <2 x double> @llvm.experimental.constrained.ceil.v2f64(<2 x double>, metadata) +declare <4 x float> @llvm.experimental.constrained.floor.v4f32(<4 x float>, metadata) +declare <2 x double> @llvm.experimental.constrained.floor.v2f64(<2 x double>, metadata) +declare <4 x float> @llvm.experimental.constrained.trunc.v4f32(<4 x float>, metadata) +declare <2 x double> @llvm.experimental.constrained.trunc.v2f64(<2 x double>, metadata) +declare <4 x float> @llvm.experimental.constrained.rint.v4f32(<4 x float>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.rint.v2f64(<2 x double>, metadata, metadata) +declare <4 x float> @llvm.experimental.constrained.nearbyint.v4f32(<4 x float>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(<2 x double>, metadata, metadata) + +define <4 x float> @fceilv4f32(<4 x float> %f) #0 { +; SSE41-LABEL: fceilv4f32: +; SSE41: # %bb.0: +; SSE41-NEXT: roundps $10, %xmm0, %xmm0 +; SSE41-NEXT: ret{{[l|q]}} +; +; AVX-LABEL: fceilv4f32: +; AVX: # %bb.0: +; AVX-NEXT: vroundps $10, %xmm0, %xmm0 +; AVX-NEXT: ret{{[l|q]}} + %res = call <4 x float> @llvm.experimental.constrained.ceil.v4f32( + <4 x float> %f, metadata !"fpexcept.strict") + ret <4 x float> %res +} + +define <2 x double> @fceilv2f64(<2 x double> %f) #0 { +; SSE41-LABEL: fceilv2f64: +; SSE41: # %bb.0: +; SSE41-NEXT: roundpd $10, %xmm0, %xmm0 +; SSE41-NEXT: ret{{[l|q]}} +; +; AVX-LABEL: fceilv2f64: +; AVX: # %bb.0: +; AVX-NEXT: vroundpd $10, %xmm0, %xmm0 +; AVX-NEXT: ret{{[l|q]}} + %res = call <2 x double> @llvm.experimental.constrained.ceil.v2f64( + <2 x double> %f, metadata !"fpexcept.strict") + ret <2 x double> %res +} + +define <4 x float> @ffloorv4f32(<4 x float> %f) #0 { +; SSE41-LABEL: ffloorv4f32: +; SSE41: # %bb.0: +; SSE41-NEXT: roundps $9, %xmm0, %xmm0 +; SSE41-NEXT: ret{{[l|q]}} +; +; AVX-LABEL: ffloorv4f32: +; AVX: # %bb.0: +; AVX-NEXT: vroundps $9, %xmm0, %xmm0 +; AVX-NEXT: ret{{[l|q]}} + %res = call <4 x float> @llvm.experimental.constrained.floor.v4f32( + <4 x float> %f, metadata !"fpexcept.strict") + ret <4 x float> %res +} + +define <2 x double> @ffloorv2f64(<2 x double> %f) #0 { +; SSE41-LABEL: ffloorv2f64: +; SSE41: # %bb.0: +; SSE41-NEXT: roundpd $9, %xmm0, %xmm0 +; SSE41-NEXT: ret{{[l|q]}} +; +; AVX-LABEL: ffloorv2f64: +; AVX: # %bb.0: +; AVX-NEXT: vroundpd $9, %xmm0, %xmm0 +; AVX-NEXT: ret{{[l|q]}} + %res = call <2 x double> @llvm.experimental.constrained.floor.v2f64( + <2 x double> %f, metadata !"fpexcept.strict") + ret <2 x double> %res +} + +define <4 x float> @ftruncv4f32(<4 x float> %f) #0 { +; SSE41-LABEL: ftruncv4f32: +; SSE41: # %bb.0: +; SSE41-NEXT: roundps $11, %xmm0, %xmm0 +; SSE41-NEXT: ret{{[l|q]}} +; +; AVX-LABEL: ftruncv4f32: +; AVX: # %bb.0: +; AVX-NEXT: vroundps $11, %xmm0, %xmm0 +; AVX-NEXT: ret{{[l|q]}} + %res = call <4 x float> @llvm.experimental.constrained.trunc.v4f32( + <4 x float> %f, metadata !"fpexcept.strict") + ret <4 x float> %res +} + +define <2 x double> @ftruncv2f64(<2 x double> %f) #0 { +; SSE41-LABEL: ftruncv2f64: +; SSE41: # %bb.0: +; SSE41-NEXT: roundpd $11, %xmm0, %xmm0 +; SSE41-NEXT: ret{{[l|q]}} +; +; AVX-LABEL: ftruncv2f64: +; AVX: # %bb.0: +; AVX-NEXT: vroundpd $11, %xmm0, %xmm0 +; AVX-NEXT: ret{{[l|q]}} + %res = call <2 x double> @llvm.experimental.constrained.trunc.v2f64( + <2 x double> %f, metadata !"fpexcept.strict") + ret <2 x double> %res +} + +define <4 x float> @frintv4f32(<4 x float> %f) #0 { +; SSE41-LABEL: frintv4f32: +; SSE41: # %bb.0: +; SSE41-NEXT: roundps $4, %xmm0, %xmm0 +; SSE41-NEXT: ret{{[l|q]}} +; +; AVX-LABEL: frintv4f32: +; AVX: # %bb.0: +; AVX-NEXT: vroundps $4, %xmm0, %xmm0 +; AVX-NEXT: ret{{[l|q]}} + %res = call <4 x float> @llvm.experimental.constrained.rint.v4f32( + <4 x float> %f, + metadata !"round.dynamic", metadata !"fpexcept.strict") + ret <4 x float> %res +} + +define <2 x double> @frintv2f64(<2 x double> %f) #0 { +; SSE41-LABEL: frintv2f64: +; SSE41: # %bb.0: +; SSE41-NEXT: roundpd $4, %xmm0, %xmm0 +; SSE41-NEXT: ret{{[l|q]}} +; +; AVX-LABEL: frintv2f64: +; AVX: # %bb.0: +; AVX-NEXT: vroundpd $4, %xmm0, %xmm0 +; AVX-NEXT: ret{{[l|q]}} + %res = call <2 x double> @llvm.experimental.constrained.rint.v2f64( + <2 x double> %f, + metadata !"round.dynamic", metadata !"fpexcept.strict") + ret <2 x double> %res +} + +define <4 x float> @fnearbyintv4f32(<4 x float> %f) #0 { +; SSE41-LABEL: fnearbyintv4f32: +; SSE41: # %bb.0: +; SSE41-NEXT: roundps $12, %xmm0, %xmm0 +; SSE41-NEXT: ret{{[l|q]}} +; +; AVX-LABEL: fnearbyintv4f32: +; AVX: # %bb.0: +; AVX-NEXT: vroundps $12, %xmm0, %xmm0 +; AVX-NEXT: ret{{[l|q]}} + %res = call <4 x float> @llvm.experimental.constrained.nearbyint.v4f32( + <4 x float> %f, + metadata !"round.dynamic", metadata !"fpexcept.strict") + ret <4 x float> %res +} + +define <2 x double> @fnearbyintv2f64(<2 x double> %f) #0 { +; SSE41-LABEL: fnearbyintv2f64: +; SSE41: # %bb.0: +; SSE41-NEXT: roundpd $12, %xmm0, %xmm0 +; SSE41-NEXT: ret{{[l|q]}} +; +; AVX-LABEL: fnearbyintv2f64: +; AVX: # %bb.0: +; AVX-NEXT: vroundpd $12, %xmm0, %xmm0 +; AVX-NEXT: ret{{[l|q]}} + %res = call <2 x double> @llvm.experimental.constrained.nearbyint.v2f64( + <2 x double> %f, + metadata !"round.dynamic", metadata !"fpexcept.strict") + ret <2 x double> %res +} + +attributes #0 = { strictfp }