Index: llvm/include/llvm/CodeGen/TargetLowering.h =================================================================== --- llvm/include/llvm/CodeGen/TargetLowering.h +++ llvm/include/llvm/CodeGen/TargetLowering.h @@ -2846,6 +2846,12 @@ /// passed to the fp16 to fp conversion library function. virtual bool shouldKeepZExtForFP16Conv() const { return false; } + /// Should we generate fp_to_si_sat and fp_to_ui_sat from type FPVT to type VT + /// from min(max(fptoi)) saturation patterns. + virtual bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const { + return isOperationLegalOrCustom(Op, VT); + } + //===--------------------------------------------------------------------===// // Runtime Library hooks // Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -4783,6 +4783,107 @@ return SDValue(); } +// Function to calculate whether the Min/Max pair of SDNodes (potentially +// swapped around) make a signed saturate pattern, clamping to between -2^(BW-1) +// and 2^(BW-1)-1. Returns the node being clamped and the bitwidth of the clamp +// in BW. Should work with both SMIN/SMAX nodes and setcc/select combo. The +// operands are the same as SimplifySelectCC. N0(N.getNode())) { + I = C->getAPIntValue(); + return true; + } + return ISD::isConstantSplatVector(N.getNode(), I); + }; + auto IsSignedMinMax = [&](SDValue N0, SDValue N1, SDValue N2, SDValue N3, + ISD::CondCode CC) { + // The compare and select operand should be the same or the select operands + // should be truncated versions of the comparison. + if (N0 != N2 && (N2.getOpcode() != ISD::TRUNCATE || N0 != N2.getOperand(0))) + return 0; + // The constants need to be the same or a truncated version of each other. + APInt C1, C2; + if (!isConstantOrConstantSplatVector(N1, C1) || + !isConstantOrConstantSplatVector(N3, C2) || + C1.getBitWidth() < C2.getBitWidth() || + C1 != C2.sextOrSelf(C1.getBitWidth())) + return 0; + return CC == ISD::SETLT ? ISD::SMIN : (CC == ISD::SETGT ? ISD::SMAX : 0); + }; + + // Check the initial value is a SMIN/SMAX equivalent. + unsigned Opcode0 = IsSignedMinMax(N0, N1, N2, N3, CC); + if (!Opcode0) + return SDValue(); + + SDValue N00, N01, N02, N03; + ISD::CondCode N0CC; + switch (N0.getOpcode()) { + case ISD::SMIN: + case ISD::SMAX: + N00 = N02 = N0.getOperand(0); + N01 = N03 = N0.getOperand(1); + N0CC = N0.getOpcode() == ISD::SMIN ? ISD::SETLT : ISD::SETGT; + break; + case ISD::SELECT_CC: + N00 = N0.getOperand(0); + N01 = N0.getOperand(1); + N02 = N0.getOperand(2); + N03 = N0.getOperand(3); + N0CC = cast(N0.getOperand(4))->get(); + break; + case ISD::SELECT: + case ISD::VSELECT: + if (N0.getOperand(0).getOpcode() != ISD::SETCC) + return SDValue(); + N00 = N0.getOperand(0).getOperand(0); + N01 = N0.getOperand(0).getOperand(1); + N02 = N0.getOperand(1); + N03 = N0.getOperand(2); + N0CC = cast(N0.getOperand(0).getOperand(2))->get(); + break; + default: + return SDValue(); + } + + unsigned Opcode1 = IsSignedMinMax(N00, N01, N02, N03, N0CC); + if (!Opcode1 || Opcode0 == Opcode1) + return SDValue(); + SDValue MinCOp = Opcode0 == ISD::SMIN ? N1 : N01; + SDValue MaxCOp = Opcode0 == ISD::SMIN ? N01 : N1; + + APInt MinC, MaxC; + if (!isConstantOrConstantSplatVector(MinCOp, MinC) || + !isConstantOrConstantSplatVector(MaxCOp, MaxC) || -MaxC - 1 != MinC || + !(MinC + 1).isPowerOf2()) + return SDValue(); + BW = (MinC + 1).exactLogBase2() + 1; + return N02; +} + +static SDValue PerformMinMaxFpToSatCombine(SDValue N0, SDValue N1, SDValue N2, + SDValue N3, ISD::CondCode CC, + SelectionDAG &DAG) { + unsigned BW; + SDValue Fp = IsSaturatingMinMax(N0, N1, N2, N3, CC, BW); + if (!Fp || Fp.getOpcode() != ISD::FP_TO_SINT) + return SDValue(); + EVT FPVT = Fp.getOperand(0).getValueType(); + EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), BW); + if (FPVT.isVector()) + NewVT = EVT::getVectorVT(*DAG.getContext(), NewVT, + FPVT.getVectorElementCount()); + if (!DAG.getTargetLoweringInfo().shouldConvertFpToSat( + ISD::FP_TO_SINT_SAT, Fp.getOperand(0).getValueType(), NewVT)) + return SDValue(); + SDLoc DL(Fp); + SDValue Sat = DAG.getNode(ISD::FP_TO_SINT_SAT, DL, NewVT, Fp.getOperand(0), + DAG.getValueType(NewVT.getScalarType())); + return DAG.getSExtOrTrunc(Sat, DL, N2->getValueType(0)); +} + SDValue DAGCombiner::visitIMINMAX(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -4820,6 +4921,11 @@ return DAG.getNode(AltOpcode, SDLoc(N), VT, N0, N1); } + if (Opcode == ISD::SMIN || Opcode == ISD::SMAX) + if (SDValue S = PerformMinMaxFpToSatCombine( + N0, N1, N0, N1, Opcode == ISD::SMIN ? ISD::SETLT : ISD::SETGT, DAG)) + return S; + // Simplify the operands using demanded-bits information. if (SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); @@ -10004,6 +10110,9 @@ return FMinMax; } + if (SDValue S = PerformMinMaxFpToSatCombine(LHS, RHS, N1, N2, CC, DAG)) + return S; + // If this select has a condition (setcc) with narrower operands than the // select, try to widen the compare to match the select width. // TODO: This should be extended to handle any constant. @@ -22950,6 +23059,9 @@ DAG.getSExtOrTrunc(CC == ISD::SETLT ? N3 : N2, DL, VT)); } + if (SDValue S = PerformMinMaxFpToSatCombine(N0, N1, N2, N3, CC, DAG)) + return S; + return SDValue(); } Index: llvm/lib/Target/ARM/ARMISelLowering.h =================================================================== --- llvm/lib/Target/ARM/ARMISelLowering.h +++ llvm/lib/Target/ARM/ARMISelLowering.h @@ -736,6 +736,8 @@ bool preferIncOfAddToSubOfNot(EVT VT) const override; + bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override; + protected: std::pair findRepresentativeClass(const TargetRegisterInfo *TRI, Index: llvm/lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- llvm/lib/Target/ARM/ARMISelLowering.cpp +++ llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -13439,6 +13439,26 @@ return VT.isScalarInteger(); } +bool ARMTargetLowering::shouldConvertFpToSat(unsigned Op, EVT FPVT, + EVT VT) const { + if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple()) + return false; + + switch (FPVT.getSimpleVT().SimpleTy) { + case MVT::f16: + return Subtarget->hasVFP2Base(); + case MVT::f32: + return Subtarget->hasVFP2Base(); + case MVT::f64: + return Subtarget->hasFP64(); + case MVT::v4f32: + case MVT::v8f16: + return Subtarget->hasMVEFloatOps(); + default: + return false; + } +} + static SDValue PerformSHLSimplify(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *ST) { Index: llvm/lib/Target/RISCV/RISCVISelLowering.h =================================================================== --- llvm/lib/Target/RISCV/RISCVISelLowering.h +++ llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -513,6 +513,8 @@ bool isLegalElementTypeForRVV(Type *ScalarTy) const; + bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override; + private: /// RISCVCCAssignFn - This target-specific function extends the default /// CCValAssign with additional information used to lower RISC-V calling Index: llvm/lib/Target/RISCV/RISCVISelLowering.cpp =================================================================== --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -9679,6 +9679,23 @@ return false; } +bool RISCVTargetLowering::shouldConvertFpToSat(unsigned Op, EVT FPVT, + EVT VT) const { + if (!isOperationLegalOrCustom(Op, VT) || !FPVT.isSimple()) + return false; + + switch (FPVT.getSimpleVT().SimpleTy) { + case MVT::f16: + return Subtarget.hasStdExtZfh(); + case MVT::f32: + return Subtarget.hasStdExtF(); + case MVT::f64: + return Subtarget.hasStdExtD(); + default: + return false; + } +} + bool RISCVTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const { VT = VT.getScalarType(); Index: llvm/test/CodeGen/AArch64/fpclamptosat.ll =================================================================== --- llvm/test/CodeGen/AArch64/fpclamptosat.ll +++ llvm/test/CodeGen/AArch64/fpclamptosat.ll @@ -7,14 +7,7 @@ define i32 @stest_f64i32(double %x) { ; CHECK-LABEL: stest_f64i32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fcvtzs x8, d0 -; CHECK-NEXT: mov w9, #2147483647 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: csel x8, x8, x9, lt -; CHECK-NEXT: mov x9, #-2147483648 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: mov w9, #-2147483648 -; CHECK-NEXT: csel w0, w8, w9, gt +; CHECK-NEXT: fcvtzs w0, d0 ; CHECK-NEXT: ret entry: %conv = fptosi double %x to i64 @@ -65,14 +58,7 @@ define i32 @stest_f32i32(float %x) { ; CHECK-LABEL: stest_f32i32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fcvtzs x8, s0 -; CHECK-NEXT: mov w9, #2147483647 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: csel x8, x8, x9, lt -; CHECK-NEXT: mov x9, #-2147483648 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: mov w9, #-2147483648 -; CHECK-NEXT: csel w0, w8, w9, gt +; CHECK-NEXT: fcvtzs w0, s0 ; CHECK-NEXT: ret entry: %conv = fptosi float %x to i64 @@ -124,26 +110,12 @@ ; CHECK-CVT-LABEL: stest_f16i32: ; CHECK-CVT: // %bb.0: // %entry ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov w9, #2147483647 -; CHECK-CVT-NEXT: fcvtzs x8, s0 -; CHECK-CVT-NEXT: cmp x8, x9 -; CHECK-CVT-NEXT: csel x8, x8, x9, lt -; CHECK-CVT-NEXT: mov x9, #-2147483648 -; CHECK-CVT-NEXT: cmp x8, x9 -; CHECK-CVT-NEXT: mov w9, #-2147483648 -; CHECK-CVT-NEXT: csel w0, w8, w9, gt +; CHECK-CVT-NEXT: fcvtzs w0, s0 ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: stest_f16i32: ; CHECK-FP16: // %bb.0: // %entry -; CHECK-FP16-NEXT: fcvtzs x8, h0 -; CHECK-FP16-NEXT: mov w9, #2147483647 -; CHECK-FP16-NEXT: cmp x8, x9 -; CHECK-FP16-NEXT: csel x8, x8, x9, lt -; CHECK-FP16-NEXT: mov x9, #-2147483648 -; CHECK-FP16-NEXT: cmp x8, x9 -; CHECK-FP16-NEXT: mov w9, #-2147483648 -; CHECK-FP16-NEXT: csel w0, w8, w9, gt +; CHECK-FP16-NEXT: fcvtzs w0, h0 ; CHECK-FP16-NEXT: ret entry: %conv = fptosi half %x to i64 @@ -417,29 +389,7 @@ define i64 @stest_f64i64(double %x) { ; CHECK-LABEL: stest_f64i64: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: bl __fixdfti -; CHECK-NEXT: mov x8, #9223372036854775807 -; CHECK-NEXT: cmp x0, x8 -; CHECK-NEXT: cset w9, lo -; CHECK-NEXT: cmp x1, #0 -; CHECK-NEXT: cset w10, lt -; CHECK-NEXT: csel w9, w9, w10, eq -; CHECK-NEXT: mov x10, #-9223372036854775808 -; CHECK-NEXT: cmp w9, #0 -; CHECK-NEXT: csel x9, x1, xzr, ne -; CHECK-NEXT: csel x8, x0, x8, ne -; CHECK-NEXT: cmp x9, #0 -; CHECK-NEXT: cset w11, ge -; CHECK-NEXT: cmp x8, x10 -; CHECK-NEXT: cset w12, hi -; CHECK-NEXT: cmn x9, #1 -; CHECK-NEXT: csel w9, w12, w11, eq -; CHECK-NEXT: cmp w9, #0 -; CHECK-NEXT: csel x0, x8, x10, ne -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: fcvtzs x0, d0 ; CHECK-NEXT: ret entry: %conv = fptosi double %x to i128 @@ -502,29 +452,7 @@ define i64 @stest_f32i64(float %x) { ; CHECK-LABEL: stest_f32i64: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: mov x8, #9223372036854775807 -; CHECK-NEXT: cmp x0, x8 -; CHECK-NEXT: cset w9, lo -; CHECK-NEXT: cmp x1, #0 -; CHECK-NEXT: cset w10, lt -; CHECK-NEXT: csel w9, w9, w10, eq -; CHECK-NEXT: mov x10, #-9223372036854775808 -; CHECK-NEXT: cmp w9, #0 -; CHECK-NEXT: csel x9, x1, xzr, ne -; CHECK-NEXT: csel x8, x0, x8, ne -; CHECK-NEXT: cmp x9, #0 -; CHECK-NEXT: cset w11, ge -; CHECK-NEXT: cmp x8, x10 -; CHECK-NEXT: cset w12, hi -; CHECK-NEXT: cmn x9, #1 -; CHECK-NEXT: csel w9, w12, w11, eq -; CHECK-NEXT: cmp w9, #0 -; CHECK-NEXT: csel x0, x8, x10, ne -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: fcvtzs x0, s0 ; CHECK-NEXT: ret entry: %conv = fptosi float %x to i128 @@ -585,32 +513,16 @@ } define i64 @stest_f16i64(half %x) { -; CHECK-LABEL: stest_f16i64: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: bl __fixhfti -; CHECK-NEXT: mov x8, #9223372036854775807 -; CHECK-NEXT: cmp x0, x8 -; CHECK-NEXT: cset w9, lo -; CHECK-NEXT: cmp x1, #0 -; CHECK-NEXT: cset w10, lt -; CHECK-NEXT: csel w9, w9, w10, eq -; CHECK-NEXT: mov x10, #-9223372036854775808 -; CHECK-NEXT: cmp w9, #0 -; CHECK-NEXT: csel x9, x1, xzr, ne -; CHECK-NEXT: csel x8, x0, x8, ne -; CHECK-NEXT: cmp x9, #0 -; CHECK-NEXT: cset w11, ge -; CHECK-NEXT: cmp x8, x10 -; CHECK-NEXT: cset w12, hi -; CHECK-NEXT: cmn x9, #1 -; CHECK-NEXT: csel w9, w12, w11, eq -; CHECK-NEXT: cmp w9, #0 -; CHECK-NEXT: csel x0, x8, x10, ne -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: ret +; CHECK-CVT-LABEL: stest_f16i64: +; CHECK-CVT: // %bb.0: // %entry +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: fcvtzs x0, s0 +; CHECK-CVT-NEXT: ret +; +; CHECK-FP16-LABEL: stest_f16i64: +; CHECK-FP16: // %bb.0: // %entry +; CHECK-FP16-NEXT: fcvtzs x0, h0 +; CHECK-FP16-NEXT: ret entry: %conv = fptosi half %x to i128 %0 = icmp slt i128 %conv, 9223372036854775807 @@ -676,14 +588,7 @@ define i32 @stest_f64i32_mm(double %x) { ; CHECK-LABEL: stest_f64i32_mm: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fcvtzs x8, d0 -; CHECK-NEXT: mov w9, #2147483647 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: csel x8, x8, x9, lt -; CHECK-NEXT: mov x9, #-2147483648 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: csel x0, x8, x9, gt -; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: fcvtzs w0, d0 ; CHECK-NEXT: ret entry: %conv = fptosi double %x to i64 @@ -731,14 +636,7 @@ define i32 @stest_f32i32_mm(float %x) { ; CHECK-LABEL: stest_f32i32_mm: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fcvtzs x8, s0 -; CHECK-NEXT: mov w9, #2147483647 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: csel x8, x8, x9, lt -; CHECK-NEXT: mov x9, #-2147483648 -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: csel x0, x8, x9, gt -; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: fcvtzs w0, s0 ; CHECK-NEXT: ret entry: %conv = fptosi float %x to i64 @@ -787,26 +685,12 @@ ; CHECK-CVT-LABEL: stest_f16i32_mm: ; CHECK-CVT: // %bb.0: // %entry ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov w9, #2147483647 -; CHECK-CVT-NEXT: fcvtzs x8, s0 -; CHECK-CVT-NEXT: cmp x8, x9 -; CHECK-CVT-NEXT: csel x8, x8, x9, lt -; CHECK-CVT-NEXT: mov x9, #-2147483648 -; CHECK-CVT-NEXT: cmp x8, x9 -; CHECK-CVT-NEXT: csel x0, x8, x9, gt -; CHECK-CVT-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-CVT-NEXT: fcvtzs w0, s0 ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: stest_f16i32_mm: ; CHECK-FP16: // %bb.0: // %entry -; CHECK-FP16-NEXT: fcvtzs x8, h0 -; CHECK-FP16-NEXT: mov w9, #2147483647 -; CHECK-FP16-NEXT: cmp x8, x9 -; CHECK-FP16-NEXT: csel x8, x8, x9, lt -; CHECK-FP16-NEXT: mov x9, #-2147483648 -; CHECK-FP16-NEXT: cmp x8, x9 -; CHECK-FP16-NEXT: csel x0, x8, x9, gt -; CHECK-FP16-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-FP16-NEXT: fcvtzs w0, h0 ; CHECK-FP16-NEXT: ret entry: %conv = fptosi half %x to i64 @@ -1068,25 +952,7 @@ define i64 @stest_f64i64_mm(double %x) { ; CHECK-LABEL: stest_f64i64_mm: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: bl __fixdfti -; CHECK-NEXT: mov x8, #9223372036854775807 -; CHECK-NEXT: cmp x0, x8 -; CHECK-NEXT: csel x9, x0, x8, lo -; CHECK-NEXT: cmp x1, #0 -; CHECK-NEXT: csel x8, x0, x8, lt -; CHECK-NEXT: csel x10, x1, xzr, lt -; CHECK-NEXT: csel x8, x9, x8, eq -; CHECK-NEXT: cmp x10, #0 -; CHECK-NEXT: mov x9, #-9223372036854775808 -; CHECK-NEXT: csel x11, x8, x9, ge -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: csel x8, x8, x9, hi -; CHECK-NEXT: cmn x10, #1 -; CHECK-NEXT: csel x0, x8, x11, eq -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: fcvtzs x0, d0 ; CHECK-NEXT: ret entry: %conv = fptosi double %x to i128 @@ -1143,25 +1009,7 @@ define i64 @stest_f32i64_mm(float %x) { ; CHECK-LABEL: stest_f32i64_mm: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: mov x8, #9223372036854775807 -; CHECK-NEXT: cmp x0, x8 -; CHECK-NEXT: csel x9, x0, x8, lo -; CHECK-NEXT: cmp x1, #0 -; CHECK-NEXT: csel x8, x0, x8, lt -; CHECK-NEXT: csel x10, x1, xzr, lt -; CHECK-NEXT: csel x8, x9, x8, eq -; CHECK-NEXT: cmp x10, #0 -; CHECK-NEXT: mov x9, #-9223372036854775808 -; CHECK-NEXT: csel x11, x8, x9, ge -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: csel x8, x8, x9, hi -; CHECK-NEXT: cmn x10, #1 -; CHECK-NEXT: csel x0, x8, x11, eq -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: fcvtzs x0, s0 ; CHECK-NEXT: ret entry: %conv = fptosi float %x to i128 @@ -1216,28 +1064,16 @@ } define i64 @stest_f16i64_mm(half %x) { -; CHECK-LABEL: stest_f16i64_mm: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: bl __fixhfti -; CHECK-NEXT: mov x8, #9223372036854775807 -; CHECK-NEXT: cmp x0, x8 -; CHECK-NEXT: csel x9, x0, x8, lo -; CHECK-NEXT: cmp x1, #0 -; CHECK-NEXT: csel x8, x0, x8, lt -; CHECK-NEXT: csel x10, x1, xzr, lt -; CHECK-NEXT: csel x8, x9, x8, eq -; CHECK-NEXT: cmp x10, #0 -; CHECK-NEXT: mov x9, #-9223372036854775808 -; CHECK-NEXT: csel x11, x8, x9, ge -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: csel x8, x8, x9, hi -; CHECK-NEXT: cmn x10, #1 -; CHECK-NEXT: csel x0, x8, x11, eq -; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: ret +; CHECK-CVT-LABEL: stest_f16i64_mm: +; CHECK-CVT: // %bb.0: // %entry +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: fcvtzs x0, s0 +; CHECK-CVT-NEXT: ret +; +; CHECK-FP16-LABEL: stest_f16i64_mm: +; CHECK-FP16: // %bb.0: // %entry +; CHECK-FP16-NEXT: fcvtzs x0, h0 +; CHECK-FP16-NEXT: ret entry: %conv = fptosi half %x to i128 %spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 9223372036854775807) Index: llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll =================================================================== --- llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll +++ llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll @@ -7,16 +7,12 @@ define <2 x i32> @stest_f64i32(<2 x double> %x) { ; CHECK-LABEL: stest_f64i32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #2147483647 -; CHECK-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-NEXT: dup v1.2d, x8 -; CHECK-NEXT: mov x8, #-2147483648 -; CHECK-NEXT: cmgt v2.2d, v1.2d, v0.2d -; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b -; CHECK-NEXT: dup v1.2d, x8 -; CHECK-NEXT: cmgt v2.2d, v0.2d, v1.2d -; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b -; CHECK-NEXT: xtn v0.2s, v0.2d +; CHECK-NEXT: mov d1, v0.d[1] +; CHECK-NEXT: fcvtzs w8, d0 +; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: fcvtzs w8, d1 +; CHECK-NEXT: mov v0.s[1], w8 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret entry: %conv = fptosi <2 x double> %x to <2 x i64> @@ -70,24 +66,7 @@ define <4 x i32> @stest_f32i32(<4 x float> %x) { ; CHECK-LABEL: stest_f32i32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fcvtl v1.2d, v0.2s -; CHECK-NEXT: fcvtl2 v0.2d, v0.4s -; CHECK-NEXT: mov w8, #2147483647 -; CHECK-NEXT: fcvtzs v1.2d, v1.2d -; CHECK-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-NEXT: dup v2.2d, x8 -; CHECK-NEXT: mov x8, #-2147483648 -; CHECK-NEXT: cmgt v3.2d, v2.2d, v1.2d -; CHECK-NEXT: cmgt v4.2d, v2.2d, v0.2d -; CHECK-NEXT: bif v1.16b, v2.16b, v3.16b -; CHECK-NEXT: bif v0.16b, v2.16b, v4.16b -; CHECK-NEXT: dup v2.2d, x8 -; CHECK-NEXT: cmgt v3.2d, v1.2d, v2.2d -; CHECK-NEXT: cmgt v4.2d, v0.2d, v2.2d -; CHECK-NEXT: bif v1.16b, v2.16b, v3.16b -; CHECK-NEXT: bit v2.16b, v0.16b, v4.16b -; CHECK-NEXT: xtn v0.2s, v1.2d -; CHECK-NEXT: xtn2 v0.4s, v2.2d +; CHECK-NEXT: fcvtzs v0.4s, v0.4s ; CHECK-NEXT: ret entry: %conv = fptosi <4 x float> %x to <4 x i64> @@ -154,69 +133,11 @@ } define <4 x i32> @stest_f16i32(<4 x half> %x) { -; CHECK-CVT-LABEL: stest_f16i32: -; CHECK-CVT: // %bb.0: // %entry -; CHECK-CVT-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-CVT-NEXT: mov h1, v0.h[2] -; CHECK-CVT-NEXT: mov h2, v0.h[1] -; CHECK-CVT-NEXT: fcvt s3, h0 -; CHECK-CVT-NEXT: mov h0, v0.h[3] -; CHECK-CVT-NEXT: mov w8, #2147483647 -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s2, h2 -; CHECK-CVT-NEXT: fcvtzs x9, s3 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fcvtzs x10, s1 -; CHECK-CVT-NEXT: fmov d1, x9 -; CHECK-CVT-NEXT: fcvtzs x9, s2 -; CHECK-CVT-NEXT: fmov d2, x10 -; CHECK-CVT-NEXT: fcvtzs x10, s0 -; CHECK-CVT-NEXT: mov v1.d[1], x9 -; CHECK-CVT-NEXT: dup v0.2d, x8 -; CHECK-CVT-NEXT: mov x8, #-2147483648 -; CHECK-CVT-NEXT: mov v2.d[1], x10 -; CHECK-CVT-NEXT: cmgt v3.2d, v0.2d, v1.2d -; CHECK-CVT-NEXT: cmgt v4.2d, v0.2d, v2.2d -; CHECK-CVT-NEXT: bif v1.16b, v0.16b, v3.16b -; CHECK-CVT-NEXT: bit v0.16b, v2.16b, v4.16b -; CHECK-CVT-NEXT: dup v2.2d, x8 -; CHECK-CVT-NEXT: cmgt v3.2d, v1.2d, v2.2d -; CHECK-CVT-NEXT: cmgt v4.2d, v0.2d, v2.2d -; CHECK-CVT-NEXT: bif v1.16b, v2.16b, v3.16b -; CHECK-CVT-NEXT: bit v2.16b, v0.16b, v4.16b -; CHECK-CVT-NEXT: xtn v0.2s, v1.2d -; CHECK-CVT-NEXT: xtn2 v0.4s, v2.2d -; CHECK-CVT-NEXT: ret -; -; CHECK-FP16-LABEL: stest_f16i32: -; CHECK-FP16: // %bb.0: // %entry -; CHECK-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-FP16-NEXT: mov h1, v0.h[2] -; CHECK-FP16-NEXT: mov h2, v0.h[1] -; CHECK-FP16-NEXT: fcvtzs x9, h0 -; CHECK-FP16-NEXT: mov h0, v0.h[3] -; CHECK-FP16-NEXT: mov w8, #2147483647 -; CHECK-FP16-NEXT: fcvtzs x10, h1 -; CHECK-FP16-NEXT: fmov d1, x9 -; CHECK-FP16-NEXT: fcvtzs x9, h2 -; CHECK-FP16-NEXT: fmov d2, x10 -; CHECK-FP16-NEXT: fcvtzs x10, h0 -; CHECK-FP16-NEXT: mov v1.d[1], x9 -; CHECK-FP16-NEXT: dup v0.2d, x8 -; CHECK-FP16-NEXT: mov x8, #-2147483648 -; CHECK-FP16-NEXT: mov v2.d[1], x10 -; CHECK-FP16-NEXT: cmgt v3.2d, v0.2d, v1.2d -; CHECK-FP16-NEXT: cmgt v4.2d, v0.2d, v2.2d -; CHECK-FP16-NEXT: bif v1.16b, v0.16b, v3.16b -; CHECK-FP16-NEXT: bit v0.16b, v2.16b, v4.16b -; CHECK-FP16-NEXT: dup v2.2d, x8 -; CHECK-FP16-NEXT: cmgt v3.2d, v1.2d, v2.2d -; CHECK-FP16-NEXT: cmgt v4.2d, v0.2d, v2.2d -; CHECK-FP16-NEXT: bif v1.16b, v2.16b, v3.16b -; CHECK-FP16-NEXT: bit v2.16b, v0.16b, v4.16b -; CHECK-FP16-NEXT: xtn v0.2s, v1.2d -; CHECK-FP16-NEXT: xtn2 v0.4s, v2.2d -; CHECK-FP16-NEXT: ret +; CHECK-LABEL: stest_f16i32: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: ret entry: %conv = fptosi <4 x half> %x to <4 x i64> %0 = icmp slt <4 x i64> %conv, @@ -466,19 +387,74 @@ } define <8 x i16> @stest_f16i16(<8 x half> %x) { -; CHECK-LABEL: stest_f16i16: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fcvtl2 v2.4s, v0.8h -; CHECK-NEXT: fcvtl v0.4s, v0.4h -; CHECK-NEXT: movi v1.4s, #127, msl #8 -; CHECK-NEXT: mvni v3.4s, #127, msl #8 -; CHECK-NEXT: fcvtzs v2.4s, v2.4s -; CHECK-NEXT: fcvtzs v0.4s, v0.4s -; CHECK-NEXT: smin v1.4s, v2.4s, v1.4s -; CHECK-NEXT: sqxtn v0.4h, v0.4s -; CHECK-NEXT: smax v1.4s, v1.4s, v3.4s -; CHECK-NEXT: xtn2 v0.8h, v1.4s -; CHECK-NEXT: ret +; CHECK-CVT-LABEL: stest_f16i16: +; CHECK-CVT: // %bb.0: // %entry +; CHECK-CVT-NEXT: fcvtl2 v1.4s, v0.8h +; CHECK-CVT-NEXT: mov w8, #32767 +; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-NEXT: mov w10, #-32768 +; CHECK-CVT-NEXT: mov s2, v1.s[1] +; CHECK-CVT-NEXT: fcvtzs w11, s1 +; CHECK-CVT-NEXT: fcvtzs w15, s0 +; CHECK-CVT-NEXT: fcvtzs w9, s2 +; CHECK-CVT-NEXT: mov s2, v1.s[2] +; CHECK-CVT-NEXT: mov s1, v1.s[3] +; CHECK-CVT-NEXT: cmp w9, w8 +; CHECK-CVT-NEXT: csel w9, w9, w8, lt +; CHECK-CVT-NEXT: fcvtzs w12, s2 +; CHECK-CVT-NEXT: cmn w9, #8, lsl #12 // =32768 +; CHECK-CVT-NEXT: mov s2, v0.s[1] +; CHECK-CVT-NEXT: csel w9, w9, w10, gt +; CHECK-CVT-NEXT: cmp w11, w8 +; CHECK-CVT-NEXT: csel w11, w11, w8, lt +; CHECK-CVT-NEXT: fcvtzs w13, s1 +; CHECK-CVT-NEXT: cmn w11, #8, lsl #12 // =32768 +; CHECK-CVT-NEXT: mov s1, v0.s[2] +; CHECK-CVT-NEXT: csel w11, w11, w10, gt +; CHECK-CVT-NEXT: cmp w12, w8 +; CHECK-CVT-NEXT: csel w12, w12, w8, lt +; CHECK-CVT-NEXT: fcvtzs w14, s2 +; CHECK-CVT-NEXT: cmn w12, #8, lsl #12 // =32768 +; CHECK-CVT-NEXT: mov s0, v0.s[3] +; CHECK-CVT-NEXT: csel w12, w12, w10, gt +; CHECK-CVT-NEXT: cmp w13, w8 +; CHECK-CVT-NEXT: csel w13, w13, w8, lt +; CHECK-CVT-NEXT: fcvtzs w16, s1 +; CHECK-CVT-NEXT: cmn w13, #8, lsl #12 // =32768 +; CHECK-CVT-NEXT: fmov s2, w11 +; CHECK-CVT-NEXT: csel w13, w13, w10, gt +; CHECK-CVT-NEXT: cmp w14, w8 +; CHECK-CVT-NEXT: csel w14, w14, w8, lt +; CHECK-CVT-NEXT: cmn w14, #8, lsl #12 // =32768 +; CHECK-CVT-NEXT: csel w14, w14, w10, gt +; CHECK-CVT-NEXT: cmp w15, w8 +; CHECK-CVT-NEXT: csel w15, w15, w8, lt +; CHECK-CVT-NEXT: cmn w15, #8, lsl #12 // =32768 +; CHECK-CVT-NEXT: csel w15, w15, w10, gt +; CHECK-CVT-NEXT: cmp w16, w8 +; CHECK-CVT-NEXT: csel w11, w16, w8, lt +; CHECK-CVT-NEXT: cmn w11, #8, lsl #12 // =32768 +; CHECK-CVT-NEXT: fmov s1, w15 +; CHECK-CVT-NEXT: fcvtzs w15, s0 +; CHECK-CVT-NEXT: csel w11, w11, w10, gt +; CHECK-CVT-NEXT: mov v2.s[1], w9 +; CHECK-CVT-NEXT: mov v1.s[1], w14 +; CHECK-CVT-NEXT: cmp w15, w8 +; CHECK-CVT-NEXT: csel w8, w15, w8, lt +; CHECK-CVT-NEXT: cmn w8, #8, lsl #12 // =32768 +; CHECK-CVT-NEXT: csel w8, w8, w10, gt +; CHECK-CVT-NEXT: mov v1.s[2], w11 +; CHECK-CVT-NEXT: mov v2.s[2], w12 +; CHECK-CVT-NEXT: mov v1.s[3], w8 +; CHECK-CVT-NEXT: mov v2.s[3], w13 +; CHECK-CVT-NEXT: xtn v0.4h, v1.4s +; CHECK-CVT-NEXT: xtn2 v0.8h, v2.4s +; CHECK-CVT-NEXT: ret +; +; CHECK-FP16-LABEL: stest_f16i16: +; CHECK-FP16: // %bb.0: // %entry +; CHECK-FP16-NEXT: fcvtzs v0.8h, v0.8h +; CHECK-FP16-NEXT: ret entry: %conv = fptosi <8 x half> %x to <8 x i32> %0 = icmp slt <8 x i32> %conv, @@ -540,61 +516,7 @@ define <2 x i64> @stest_f64i64(<2 x double> %x) { ; CHECK-LABEL: stest_f64i64: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sub sp, sp, #48 -; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill -; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 48 -; CHECK-NEXT: .cfi_offset w19, -8 -; CHECK-NEXT: .cfi_offset w20, -16 -; CHECK-NEXT: .cfi_offset w30, -32 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-NEXT: bl __fixdfti -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: mov x19, x0 -; CHECK-NEXT: mov x20, x1 -; CHECK-NEXT: mov d0, v0.d[1] -; CHECK-NEXT: bl __fixdfti -; CHECK-NEXT: mov x8, #9223372036854775807 -; CHECK-NEXT: mov x12, #-9223372036854775808 -; CHECK-NEXT: cmp x0, x8 -; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload -; CHECK-NEXT: cset w9, lo -; CHECK-NEXT: cmp x1, #0 -; CHECK-NEXT: cset w10, lt -; CHECK-NEXT: csel w9, w9, w10, eq -; CHECK-NEXT: cmp x19, x8 -; CHECK-NEXT: cset w10, lo -; CHECK-NEXT: cmp x20, #0 -; CHECK-NEXT: cset w11, lt -; CHECK-NEXT: csel w10, w10, w11, eq -; CHECK-NEXT: cmp w10, #0 -; CHECK-NEXT: csel x10, x19, x8, ne -; CHECK-NEXT: csel x11, x20, xzr, ne -; CHECK-NEXT: cmp w9, #0 -; CHECK-NEXT: csel x9, x1, xzr, ne -; CHECK-NEXT: csel x8, x0, x8, ne -; CHECK-NEXT: cmp x9, #0 -; CHECK-NEXT: cset w13, ge -; CHECK-NEXT: cmp x8, x12 -; CHECK-NEXT: cset w14, hi -; CHECK-NEXT: cmn x9, #1 -; CHECK-NEXT: csel w9, w14, w13, eq -; CHECK-NEXT: cmp x11, #0 -; CHECK-NEXT: cset w13, ge -; CHECK-NEXT: cmp x10, x12 -; CHECK-NEXT: cset w14, hi -; CHECK-NEXT: cmn x11, #1 -; CHECK-NEXT: csel w11, w14, w13, eq -; CHECK-NEXT: cmp w11, #0 -; CHECK-NEXT: csel x10, x10, x12, ne -; CHECK-NEXT: cmp w9, #0 -; CHECK-NEXT: csel x8, x8, x12, ne -; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: fmov d0, x10 -; CHECK-NEXT: fmov d1, x8 -; CHECK-NEXT: mov v0.d[1], v1.d[0] -; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: fcvtzs v0.2d, v0.2d ; CHECK-NEXT: ret entry: %conv = fptosi <2 x double> %x to <2 x i128> @@ -701,62 +623,12 @@ define <2 x i64> @stest_f32i64(<2 x float> %x) { ; CHECK-LABEL: stest_f32i64: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sub sp, sp, #48 -; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill -; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 48 -; CHECK-NEXT: .cfi_offset w19, -8 -; CHECK-NEXT: .cfi_offset w20, -16 -; CHECK-NEXT: .cfi_offset w30, -32 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 -; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: mov x19, x0 -; CHECK-NEXT: mov x20, x1 -; CHECK-NEXT: mov s0, v0.s[1] -; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: mov x8, #9223372036854775807 -; CHECK-NEXT: mov x12, #-9223372036854775808 -; CHECK-NEXT: cmp x0, x8 -; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload -; CHECK-NEXT: cset w9, lo -; CHECK-NEXT: cmp x1, #0 -; CHECK-NEXT: cset w10, lt -; CHECK-NEXT: csel w9, w9, w10, eq -; CHECK-NEXT: cmp x19, x8 -; CHECK-NEXT: cset w10, lo -; CHECK-NEXT: cmp x20, #0 -; CHECK-NEXT: cset w11, lt -; CHECK-NEXT: csel w10, w10, w11, eq -; CHECK-NEXT: cmp w10, #0 -; CHECK-NEXT: csel x10, x19, x8, ne -; CHECK-NEXT: csel x11, x20, xzr, ne -; CHECK-NEXT: cmp w9, #0 -; CHECK-NEXT: csel x9, x1, xzr, ne -; CHECK-NEXT: csel x8, x0, x8, ne -; CHECK-NEXT: cmp x9, #0 -; CHECK-NEXT: cset w13, ge -; CHECK-NEXT: cmp x8, x12 -; CHECK-NEXT: cset w14, hi -; CHECK-NEXT: cmn x9, #1 -; CHECK-NEXT: csel w9, w14, w13, eq -; CHECK-NEXT: cmp x11, #0 -; CHECK-NEXT: cset w13, ge -; CHECK-NEXT: cmp x10, x12 -; CHECK-NEXT: cset w14, hi -; CHECK-NEXT: cmn x11, #1 -; CHECK-NEXT: csel w11, w14, w13, eq -; CHECK-NEXT: cmp w11, #0 -; CHECK-NEXT: csel x10, x10, x12, ne -; CHECK-NEXT: cmp w9, #0 -; CHECK-NEXT: csel x8, x8, x12, ne -; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: fmov d0, x10 -; CHECK-NEXT: fmov d1, x8 -; CHECK-NEXT: mov v0.d[1], v1.d[0] -; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: mov s1, v0.s[1] +; CHECK-NEXT: fcvtzs x8, s0 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: fcvtzs x8, s1 +; CHECK-NEXT: mov v0.d[1], x8 ; CHECK-NEXT: ret entry: %conv = fptosi <2 x float> %x to <2 x i128> @@ -863,65 +735,27 @@ } define <2 x i64> @stest_f16i64(<2 x half> %x) { -; CHECK-LABEL: stest_f16i64: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sub sp, sp, #48 -; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill -; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 48 -; CHECK-NEXT: .cfi_offset w19, -8 -; CHECK-NEXT: .cfi_offset w20, -16 -; CHECK-NEXT: .cfi_offset w30, -32 -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: // kill: def $h0 killed $h0 killed $q0 -; CHECK-NEXT: bl __fixhfti -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: mov x19, x0 -; CHECK-NEXT: mov x20, x1 -; CHECK-NEXT: mov h0, v0.h[1] -; CHECK-NEXT: bl __fixhfti -; CHECK-NEXT: mov x8, #9223372036854775807 -; CHECK-NEXT: mov x12, #-9223372036854775808 -; CHECK-NEXT: cmp x0, x8 -; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload -; CHECK-NEXT: cset w9, lo -; CHECK-NEXT: cmp x1, #0 -; CHECK-NEXT: cset w10, lt -; CHECK-NEXT: csel w9, w9, w10, eq -; CHECK-NEXT: cmp x19, x8 -; CHECK-NEXT: cset w10, lo -; CHECK-NEXT: cmp x20, #0 -; CHECK-NEXT: cset w11, lt -; CHECK-NEXT: csel w10, w10, w11, eq -; CHECK-NEXT: cmp w10, #0 -; CHECK-NEXT: csel x10, x19, x8, ne -; CHECK-NEXT: csel x11, x20, xzr, ne -; CHECK-NEXT: cmp w9, #0 -; CHECK-NEXT: csel x9, x1, xzr, ne -; CHECK-NEXT: csel x8, x0, x8, ne -; CHECK-NEXT: cmp x9, #0 -; CHECK-NEXT: cset w13, ge -; CHECK-NEXT: cmp x8, x12 -; CHECK-NEXT: cset w14, hi -; CHECK-NEXT: cmn x9, #1 -; CHECK-NEXT: csel w9, w14, w13, eq -; CHECK-NEXT: cmp x11, #0 -; CHECK-NEXT: cset w13, ge -; CHECK-NEXT: cmp x10, x12 -; CHECK-NEXT: cset w14, hi -; CHECK-NEXT: cmn x11, #1 -; CHECK-NEXT: csel w11, w14, w13, eq -; CHECK-NEXT: cmp w11, #0 -; CHECK-NEXT: csel x10, x10, x12, ne -; CHECK-NEXT: cmp w9, #0 -; CHECK-NEXT: csel x8, x8, x12, ne -; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: fmov d0, x10 -; CHECK-NEXT: fmov d1, x8 -; CHECK-NEXT: mov v0.d[1], v1.d[0] -; CHECK-NEXT: add sp, sp, #48 -; CHECK-NEXT: ret +; CHECK-CVT-LABEL: stest_f16i64: +; CHECK-CVT: // %bb.0: // %entry +; CHECK-CVT-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-CVT-NEXT: mov h1, v0.h[1] +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: fcvtzs x8, s0 +; CHECK-CVT-NEXT: fmov d0, x8 +; CHECK-CVT-NEXT: fcvtzs x8, s1 +; CHECK-CVT-NEXT: mov v0.d[1], x8 +; CHECK-CVT-NEXT: ret +; +; CHECK-FP16-LABEL: stest_f16i64: +; CHECK-FP16: // %bb.0: // %entry +; CHECK-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-FP16-NEXT: mov h1, v0.h[1] +; CHECK-FP16-NEXT: fcvtzs x8, h0 +; CHECK-FP16-NEXT: fmov d0, x8 +; CHECK-FP16-NEXT: fcvtzs x8, h1 +; CHECK-FP16-NEXT: mov v0.d[1], x8 +; CHECK-FP16-NEXT: ret entry: %conv = fptosi <2 x half> %x to <2 x i128> %0 = icmp slt <2 x i128> %conv, @@ -1033,16 +867,12 @@ define <2 x i32> @stest_f64i32_mm(<2 x double> %x) { ; CHECK-LABEL: stest_f64i32_mm: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #2147483647 -; CHECK-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-NEXT: dup v1.2d, x8 -; CHECK-NEXT: mov x8, #-2147483648 -; CHECK-NEXT: cmgt v2.2d, v1.2d, v0.2d -; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b -; CHECK-NEXT: dup v1.2d, x8 -; CHECK-NEXT: cmgt v2.2d, v0.2d, v1.2d -; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b -; CHECK-NEXT: xtn v0.2s, v0.2d +; CHECK-NEXT: mov d1, v0.d[1] +; CHECK-NEXT: fcvtzs w8, d0 +; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: fcvtzs w8, d1 +; CHECK-NEXT: mov v0.s[1], w8 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret entry: %conv = fptosi <2 x double> %x to <2 x i64> @@ -1091,24 +921,7 @@ define <4 x i32> @stest_f32i32_mm(<4 x float> %x) { ; CHECK-LABEL: stest_f32i32_mm: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fcvtl v1.2d, v0.2s -; CHECK-NEXT: fcvtl2 v0.2d, v0.4s -; CHECK-NEXT: mov w8, #2147483647 -; CHECK-NEXT: fcvtzs v1.2d, v1.2d -; CHECK-NEXT: fcvtzs v0.2d, v0.2d -; CHECK-NEXT: dup v2.2d, x8 -; CHECK-NEXT: mov x8, #-2147483648 -; CHECK-NEXT: cmgt v3.2d, v2.2d, v1.2d -; CHECK-NEXT: cmgt v4.2d, v2.2d, v0.2d -; CHECK-NEXT: bif v1.16b, v2.16b, v3.16b -; CHECK-NEXT: bif v0.16b, v2.16b, v4.16b -; CHECK-NEXT: dup v2.2d, x8 -; CHECK-NEXT: cmgt v3.2d, v1.2d, v2.2d -; CHECK-NEXT: cmgt v4.2d, v0.2d, v2.2d -; CHECK-NEXT: bif v1.16b, v2.16b, v3.16b -; CHECK-NEXT: bit v2.16b, v0.16b, v4.16b -; CHECK-NEXT: xtn v0.2s, v1.2d -; CHECK-NEXT: xtn2 v0.4s, v2.2d +; CHECK-NEXT: fcvtzs v0.4s, v0.4s ; CHECK-NEXT: ret entry: %conv = fptosi <4 x float> %x to <4 x i64> @@ -1170,69 +983,11 @@ } define <4 x i32> @stest_f16i32_mm(<4 x half> %x) { -; CHECK-CVT-LABEL: stest_f16i32_mm: -; CHECK-CVT: // %bb.0: // %entry -; CHECK-CVT-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-CVT-NEXT: mov h1, v0.h[2] -; CHECK-CVT-NEXT: mov h2, v0.h[1] -; CHECK-CVT-NEXT: fcvt s3, h0 -; CHECK-CVT-NEXT: mov h0, v0.h[3] -; CHECK-CVT-NEXT: mov w8, #2147483647 -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s2, h2 -; CHECK-CVT-NEXT: fcvtzs x9, s3 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fcvtzs x10, s1 -; CHECK-CVT-NEXT: fmov d1, x9 -; CHECK-CVT-NEXT: fcvtzs x9, s2 -; CHECK-CVT-NEXT: fmov d2, x10 -; CHECK-CVT-NEXT: fcvtzs x10, s0 -; CHECK-CVT-NEXT: mov v1.d[1], x9 -; CHECK-CVT-NEXT: dup v0.2d, x8 -; CHECK-CVT-NEXT: mov x8, #-2147483648 -; CHECK-CVT-NEXT: mov v2.d[1], x10 -; CHECK-CVT-NEXT: cmgt v3.2d, v0.2d, v1.2d -; CHECK-CVT-NEXT: cmgt v4.2d, v0.2d, v2.2d -; CHECK-CVT-NEXT: bif v1.16b, v0.16b, v3.16b -; CHECK-CVT-NEXT: bit v0.16b, v2.16b, v4.16b -; CHECK-CVT-NEXT: dup v2.2d, x8 -; CHECK-CVT-NEXT: cmgt v3.2d, v1.2d, v2.2d -; CHECK-CVT-NEXT: cmgt v4.2d, v0.2d, v2.2d -; CHECK-CVT-NEXT: bif v1.16b, v2.16b, v3.16b -; CHECK-CVT-NEXT: bit v2.16b, v0.16b, v4.16b -; CHECK-CVT-NEXT: xtn v0.2s, v1.2d -; CHECK-CVT-NEXT: xtn2 v0.4s, v2.2d -; CHECK-CVT-NEXT: ret -; -; CHECK-FP16-LABEL: stest_f16i32_mm: -; CHECK-FP16: // %bb.0: // %entry -; CHECK-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-FP16-NEXT: mov h1, v0.h[2] -; CHECK-FP16-NEXT: mov h2, v0.h[1] -; CHECK-FP16-NEXT: fcvtzs x9, h0 -; CHECK-FP16-NEXT: mov h0, v0.h[3] -; CHECK-FP16-NEXT: mov w8, #2147483647 -; CHECK-FP16-NEXT: fcvtzs x10, h1 -; CHECK-FP16-NEXT: fmov d1, x9 -; CHECK-FP16-NEXT: fcvtzs x9, h2 -; CHECK-FP16-NEXT: fmov d2, x10 -; CHECK-FP16-NEXT: fcvtzs x10, h0 -; CHECK-FP16-NEXT: mov v1.d[1], x9 -; CHECK-FP16-NEXT: dup v0.2d, x8 -; CHECK-FP16-NEXT: mov x8, #-2147483648 -; CHECK-FP16-NEXT: mov v2.d[1], x10 -; CHECK-FP16-NEXT: cmgt v3.2d, v0.2d, v1.2d -; CHECK-FP16-NEXT: cmgt v4.2d, v0.2d, v2.2d -; CHECK-FP16-NEXT: bif v1.16b, v0.16b, v3.16b -; CHECK-FP16-NEXT: bit v0.16b, v2.16b, v4.16b -; CHECK-FP16-NEXT: dup v2.2d, x8 -; CHECK-FP16-NEXT: cmgt v3.2d, v1.2d, v2.2d -; CHECK-FP16-NEXT: cmgt v4.2d, v0.2d, v2.2d -; CHECK-FP16-NEXT: bif v1.16b, v2.16b, v3.16b -; CHECK-FP16-NEXT: bit v2.16b, v0.16b, v4.16b -; CHECK-FP16-NEXT: xtn v0.2s, v1.2d -; CHECK-FP16-NEXT: xtn2 v0.4s, v2.2d -; CHECK-FP16-NEXT: ret +; CHECK-LABEL: stest_f16i32_mm: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: ret entry: %conv = fptosi <4 x half> %x to <4 x i64> %spec.store.select = call <4 x i64> @llvm.smin.v4i64(<4 x i64> %conv, <4 x i64> ) @@ -1467,19 +1222,74 @@ } define <8 x i16> @stest_f16i16_mm(<8 x half> %x) { -; CHECK-LABEL: stest_f16i16_mm: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fcvtl2 v2.4s, v0.8h -; CHECK-NEXT: fcvtl v0.4s, v0.4h -; CHECK-NEXT: movi v1.4s, #127, msl #8 -; CHECK-NEXT: mvni v3.4s, #127, msl #8 -; CHECK-NEXT: fcvtzs v2.4s, v2.4s -; CHECK-NEXT: fcvtzs v0.4s, v0.4s -; CHECK-NEXT: smin v1.4s, v2.4s, v1.4s -; CHECK-NEXT: sqxtn v0.4h, v0.4s -; CHECK-NEXT: smax v1.4s, v1.4s, v3.4s -; CHECK-NEXT: xtn2 v0.8h, v1.4s -; CHECK-NEXT: ret +; CHECK-CVT-LABEL: stest_f16i16_mm: +; CHECK-CVT: // %bb.0: // %entry +; CHECK-CVT-NEXT: fcvtl2 v1.4s, v0.8h +; CHECK-CVT-NEXT: mov w8, #32767 +; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-NEXT: mov w10, #-32768 +; CHECK-CVT-NEXT: mov s2, v1.s[1] +; CHECK-CVT-NEXT: fcvtzs w11, s1 +; CHECK-CVT-NEXT: fcvtzs w15, s0 +; CHECK-CVT-NEXT: fcvtzs w9, s2 +; CHECK-CVT-NEXT: mov s2, v1.s[2] +; CHECK-CVT-NEXT: mov s1, v1.s[3] +; CHECK-CVT-NEXT: cmp w9, w8 +; CHECK-CVT-NEXT: csel w9, w9, w8, lt +; CHECK-CVT-NEXT: fcvtzs w12, s2 +; CHECK-CVT-NEXT: cmn w9, #8, lsl #12 // =32768 +; CHECK-CVT-NEXT: mov s2, v0.s[1] +; CHECK-CVT-NEXT: csel w9, w9, w10, gt +; CHECK-CVT-NEXT: cmp w11, w8 +; CHECK-CVT-NEXT: csel w11, w11, w8, lt +; CHECK-CVT-NEXT: fcvtzs w13, s1 +; CHECK-CVT-NEXT: cmn w11, #8, lsl #12 // =32768 +; CHECK-CVT-NEXT: mov s1, v0.s[2] +; CHECK-CVT-NEXT: csel w11, w11, w10, gt +; CHECK-CVT-NEXT: cmp w12, w8 +; CHECK-CVT-NEXT: csel w12, w12, w8, lt +; CHECK-CVT-NEXT: fcvtzs w14, s2 +; CHECK-CVT-NEXT: cmn w12, #8, lsl #12 // =32768 +; CHECK-CVT-NEXT: mov s0, v0.s[3] +; CHECK-CVT-NEXT: csel w12, w12, w10, gt +; CHECK-CVT-NEXT: cmp w13, w8 +; CHECK-CVT-NEXT: csel w13, w13, w8, lt +; CHECK-CVT-NEXT: fcvtzs w16, s1 +; CHECK-CVT-NEXT: cmn w13, #8, lsl #12 // =32768 +; CHECK-CVT-NEXT: fmov s2, w11 +; CHECK-CVT-NEXT: csel w13, w13, w10, gt +; CHECK-CVT-NEXT: cmp w14, w8 +; CHECK-CVT-NEXT: csel w14, w14, w8, lt +; CHECK-CVT-NEXT: cmn w14, #8, lsl #12 // =32768 +; CHECK-CVT-NEXT: csel w14, w14, w10, gt +; CHECK-CVT-NEXT: cmp w15, w8 +; CHECK-CVT-NEXT: csel w15, w15, w8, lt +; CHECK-CVT-NEXT: cmn w15, #8, lsl #12 // =32768 +; CHECK-CVT-NEXT: csel w15, w15, w10, gt +; CHECK-CVT-NEXT: cmp w16, w8 +; CHECK-CVT-NEXT: csel w11, w16, w8, lt +; CHECK-CVT-NEXT: cmn w11, #8, lsl #12 // =32768 +; CHECK-CVT-NEXT: fmov s1, w15 +; CHECK-CVT-NEXT: fcvtzs w15, s0 +; CHECK-CVT-NEXT: csel w11, w11, w10, gt +; CHECK-CVT-NEXT: mov v2.s[1], w9 +; CHECK-CVT-NEXT: mov v1.s[1], w14 +; CHECK-CVT-NEXT: cmp w15, w8 +; CHECK-CVT-NEXT: csel w8, w15, w8, lt +; CHECK-CVT-NEXT: cmn w8, #8, lsl #12 // =32768 +; CHECK-CVT-NEXT: csel w8, w8, w10, gt +; CHECK-CVT-NEXT: mov v1.s[2], w11 +; CHECK-CVT-NEXT: mov v2.s[2], w12 +; CHECK-CVT-NEXT: mov v1.s[3], w8 +; CHECK-CVT-NEXT: mov v2.s[3], w13 +; CHECK-CVT-NEXT: xtn v0.4h, v1.4s +; CHECK-CVT-NEXT: xtn2 v0.8h, v2.4s +; CHECK-CVT-NEXT: ret +; +; CHECK-FP16-LABEL: stest_f16i16_mm: +; CHECK-FP16: // %bb.0: // %entry +; CHECK-FP16-NEXT: fcvtzs v0.8h, v0.8h +; CHECK-FP16-NEXT: ret entry: %conv = fptosi <8 x half> %x to <8 x i32> %spec.store.select = call <8 x i32> @llvm.smin.v8i32(<8 x i32> %conv, <8 x i32> ) @@ -1536,53 +1346,7 @@ define <2 x i64> @stest_f64i64_mm(<2 x double> %x) { ; CHECK-LABEL: stest_f64i64_mm: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sub sp, sp, #48 -; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill -; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 48 -; CHECK-NEXT: .cfi_offset w19, -8 -; CHECK-NEXT: .cfi_offset w20, -16 -; CHECK-NEXT: .cfi_offset w30, -32 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-NEXT: bl __fixdfti -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: mov x19, x0 -; CHECK-NEXT: mov x20, x1 -; CHECK-NEXT: mov d0, v0.d[1] -; CHECK-NEXT: bl __fixdfti -; CHECK-NEXT: mov x8, #9223372036854775807 -; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload -; CHECK-NEXT: cmp x0, x8 -; CHECK-NEXT: csel x9, x0, x8, lo -; CHECK-NEXT: cmp x1, #0 -; CHECK-NEXT: csel x10, x0, x8, lt -; CHECK-NEXT: csel x11, x1, xzr, lt -; CHECK-NEXT: csel x9, x9, x10, eq -; CHECK-NEXT: cmp x19, x8 -; CHECK-NEXT: csel x10, x19, x8, lo -; CHECK-NEXT: cmp x20, #0 -; CHECK-NEXT: csel x8, x19, x8, lt -; CHECK-NEXT: csel x12, x20, xzr, lt -; CHECK-NEXT: csel x8, x10, x8, eq -; CHECK-NEXT: cmp x12, #0 -; CHECK-NEXT: mov x10, #-9223372036854775808 -; CHECK-NEXT: csel x13, x8, x10, ge -; CHECK-NEXT: cmp x8, x10 -; CHECK-NEXT: csel x8, x8, x10, hi -; CHECK-NEXT: cmn x12, #1 -; CHECK-NEXT: csel x8, x8, x13, eq -; CHECK-NEXT: cmp x11, #0 -; CHECK-NEXT: csel x12, x9, x10, ge -; CHECK-NEXT: cmp x9, x10 -; CHECK-NEXT: csel x9, x9, x10, hi -; CHECK-NEXT: cmn x11, #1 -; CHECK-NEXT: csel x9, x9, x12, eq -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: fmov d1, x9 -; CHECK-NEXT: mov v0.d[1], v1.d[0] -; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: fcvtzs v0.2d, v0.2d ; CHECK-NEXT: ret entry: %conv = fptosi <2 x double> %x to <2 x i128> @@ -1682,54 +1446,12 @@ define <2 x i64> @stest_f32i64_mm(<2 x float> %x) { ; CHECK-LABEL: stest_f32i64_mm: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sub sp, sp, #48 -; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill -; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 48 -; CHECK-NEXT: .cfi_offset w19, -8 -; CHECK-NEXT: .cfi_offset w20, -16 -; CHECK-NEXT: .cfi_offset w30, -32 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 -; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: mov x19, x0 -; CHECK-NEXT: mov x20, x1 -; CHECK-NEXT: mov s0, v0.s[1] -; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: mov x8, #9223372036854775807 -; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload -; CHECK-NEXT: cmp x0, x8 -; CHECK-NEXT: csel x9, x0, x8, lo -; CHECK-NEXT: cmp x1, #0 -; CHECK-NEXT: csel x10, x0, x8, lt -; CHECK-NEXT: csel x9, x9, x10, eq -; CHECK-NEXT: csel x10, x1, xzr, lt -; CHECK-NEXT: cmp x19, x8 -; CHECK-NEXT: csel x11, x19, x8, lo -; CHECK-NEXT: cmp x20, #0 -; CHECK-NEXT: csel x8, x19, x8, lt -; CHECK-NEXT: csel x12, x20, xzr, lt -; CHECK-NEXT: csel x8, x11, x8, eq -; CHECK-NEXT: mov x11, #-9223372036854775808 -; CHECK-NEXT: cmp x8, x11 -; CHECK-NEXT: csel x13, x8, x11, hi -; CHECK-NEXT: cmp x12, #0 -; CHECK-NEXT: csel x8, x8, x11, ge -; CHECK-NEXT: cmn x12, #1 -; CHECK-NEXT: csel x8, x13, x8, eq -; CHECK-NEXT: cmp x9, x11 -; CHECK-NEXT: csel x12, x9, x11, hi -; CHECK-NEXT: cmp x10, #0 -; CHECK-NEXT: csel x9, x9, x11, ge -; CHECK-NEXT: cmn x10, #1 -; CHECK-NEXT: csel x9, x12, x9, eq +; CHECK-NEXT: mov s1, v0.s[1] +; CHECK-NEXT: fcvtzs x8, s0 ; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: fmov d1, x9 -; CHECK-NEXT: mov v0.d[1], v1.d[0] -; CHECK-NEXT: add sp, sp, #48 +; CHECK-NEXT: fcvtzs x8, s1 +; CHECK-NEXT: mov v0.d[1], x8 ; CHECK-NEXT: ret entry: %conv = fptosi <2 x float> %x to <2 x i128> @@ -1829,57 +1551,27 @@ } define <2 x i64> @stest_f16i64_mm(<2 x half> %x) { -; CHECK-LABEL: stest_f16i64_mm: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sub sp, sp, #48 -; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill -; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 48 -; CHECK-NEXT: .cfi_offset w19, -8 -; CHECK-NEXT: .cfi_offset w20, -16 -; CHECK-NEXT: .cfi_offset w30, -32 -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill -; CHECK-NEXT: // kill: def $h0 killed $h0 killed $q0 -; CHECK-NEXT: bl __fixhfti -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: mov x19, x0 -; CHECK-NEXT: mov x20, x1 -; CHECK-NEXT: mov h0, v0.h[1] -; CHECK-NEXT: bl __fixhfti -; CHECK-NEXT: mov x8, #9223372036854775807 -; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload -; CHECK-NEXT: cmp x0, x8 -; CHECK-NEXT: csel x9, x0, x8, lo -; CHECK-NEXT: cmp x1, #0 -; CHECK-NEXT: csel x10, x0, x8, lt -; CHECK-NEXT: csel x9, x9, x10, eq -; CHECK-NEXT: csel x10, x1, xzr, lt -; CHECK-NEXT: cmp x19, x8 -; CHECK-NEXT: csel x11, x19, x8, lo -; CHECK-NEXT: cmp x20, #0 -; CHECK-NEXT: csel x8, x19, x8, lt -; CHECK-NEXT: csel x12, x20, xzr, lt -; CHECK-NEXT: csel x8, x11, x8, eq -; CHECK-NEXT: mov x11, #-9223372036854775808 -; CHECK-NEXT: cmp x8, x11 -; CHECK-NEXT: csel x13, x8, x11, hi -; CHECK-NEXT: cmp x12, #0 -; CHECK-NEXT: csel x8, x8, x11, ge -; CHECK-NEXT: cmn x12, #1 -; CHECK-NEXT: csel x8, x13, x8, eq -; CHECK-NEXT: cmp x9, x11 -; CHECK-NEXT: csel x12, x9, x11, hi -; CHECK-NEXT: cmp x10, #0 -; CHECK-NEXT: csel x9, x9, x11, ge -; CHECK-NEXT: cmn x10, #1 -; CHECK-NEXT: csel x9, x12, x9, eq -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: fmov d1, x9 -; CHECK-NEXT: mov v0.d[1], v1.d[0] -; CHECK-NEXT: add sp, sp, #48 -; CHECK-NEXT: ret +; CHECK-CVT-LABEL: stest_f16i64_mm: +; CHECK-CVT: // %bb.0: // %entry +; CHECK-CVT-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-CVT-NEXT: mov h1, v0.h[1] +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: fcvtzs x8, s0 +; CHECK-CVT-NEXT: fmov d0, x8 +; CHECK-CVT-NEXT: fcvtzs x8, s1 +; CHECK-CVT-NEXT: mov v0.d[1], x8 +; CHECK-CVT-NEXT: ret +; +; CHECK-FP16-LABEL: stest_f16i64_mm: +; CHECK-FP16: // %bb.0: // %entry +; CHECK-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-FP16-NEXT: mov h1, v0.h[1] +; CHECK-FP16-NEXT: fcvtzs x8, h0 +; CHECK-FP16-NEXT: fmov d0, x8 +; CHECK-FP16-NEXT: fcvtzs x8, h1 +; CHECK-FP16-NEXT: mov v0.d[1], x8 +; CHECK-FP16-NEXT: ret entry: %conv = fptosi <2 x half> %x to <2 x i128> %spec.store.select = call <2 x i128> @llvm.smin.v2i128(<2 x i128> %conv, <2 x i128> ) Index: llvm/test/CodeGen/ARM/fpclamptosat.ll =================================================================== --- llvm/test/CodeGen/ARM/fpclamptosat.ll +++ llvm/test/CodeGen/ARM/fpclamptosat.ll @@ -77,25 +77,9 @@ ; ; FULL-LABEL: stest_f64i32: ; FULL: @ %bb.0: @ %entry -; FULL-NEXT: .save {r7, lr} -; FULL-NEXT: push {r7, lr} -; FULL-NEXT: vmov r0, r1, d0 -; FULL-NEXT: bl __aeabi_d2lz -; FULL-NEXT: mvn r12, #-2147483648 -; FULL-NEXT: subs.w r3, r0, r12 -; FULL-NEXT: mov.w r2, #0 -; FULL-NEXT: sbcs r3, r1, #0 -; FULL-NEXT: it lt -; FULL-NEXT: movlt r2, #1 -; FULL-NEXT: cmp r2, #0 -; FULL-NEXT: csel r0, r0, r12, ne -; FULL-NEXT: csel r1, r1, r2, ne -; FULL-NEXT: rsbs.w r3, r0, #-2147483648 -; FULL-NEXT: mov.w r2, #-1 -; FULL-NEXT: sbcs.w r1, r2, r1 -; FULL-NEXT: it ge -; FULL-NEXT: movge.w r0, #-2147483648 -; FULL-NEXT: pop {r7, pc} +; FULL-NEXT: vcvt.s32.f64 s0, d0 +; FULL-NEXT: vmov r0, s0 +; FULL-NEXT: bx lr entry: %conv = fptosi double %x to i64 %0 = icmp slt i64 %conv, 2147483647 @@ -292,51 +276,11 @@ ; SOFT-NEXT: .LCPI3_0: ; SOFT-NEXT: .long 2147483647 @ 0x7fffffff ; -; VFP2-LABEL: stest_f32i32: -; VFP2: @ %bb.0: @ %entry -; VFP2-NEXT: .save {r7, lr} -; VFP2-NEXT: push {r7, lr} -; VFP2-NEXT: vmov r0, s0 -; VFP2-NEXT: bl __aeabi_f2lz -; VFP2-NEXT: mvn r2, #-2147483648 -; VFP2-NEXT: subs r3, r0, r2 -; VFP2-NEXT: mov.w r12, #0 -; VFP2-NEXT: sbcs r3, r1, #0 -; VFP2-NEXT: it lt -; VFP2-NEXT: movlt.w r12, #1 -; VFP2-NEXT: cmp.w r12, #0 -; VFP2-NEXT: itt ne -; VFP2-NEXT: movne r12, r1 -; VFP2-NEXT: movne r2, r0 -; VFP2-NEXT: mov.w r0, #-1 -; VFP2-NEXT: rsbs.w r1, r2, #-2147483648 -; VFP2-NEXT: sbcs.w r0, r0, r12 -; VFP2-NEXT: it ge -; VFP2-NEXT: movge.w r2, #-2147483648 -; VFP2-NEXT: mov r0, r2 -; VFP2-NEXT: pop {r7, pc} -; -; FULL-LABEL: stest_f32i32: -; FULL: @ %bb.0: @ %entry -; FULL-NEXT: .save {r7, lr} -; FULL-NEXT: push {r7, lr} -; FULL-NEXT: vmov r0, s0 -; FULL-NEXT: bl __aeabi_f2lz -; FULL-NEXT: mvn r12, #-2147483648 -; FULL-NEXT: subs.w r3, r0, r12 -; FULL-NEXT: mov.w r2, #0 -; FULL-NEXT: sbcs r3, r1, #0 -; FULL-NEXT: it lt -; FULL-NEXT: movlt r2, #1 -; FULL-NEXT: cmp r2, #0 -; FULL-NEXT: csel r0, r0, r12, ne -; FULL-NEXT: csel r1, r1, r2, ne -; FULL-NEXT: rsbs.w r3, r0, #-2147483648 -; FULL-NEXT: mov.w r2, #-1 -; FULL-NEXT: sbcs.w r1, r2, r1 -; FULL-NEXT: it ge -; FULL-NEXT: movge.w r0, #-2147483648 -; FULL-NEXT: pop {r7, pc} +; VFP-LABEL: stest_f32i32: +; VFP: @ %bb.0: @ %entry +; VFP-NEXT: vcvt.s32.f32 s0, s0 +; VFP-NEXT: vmov r0, s0 +; VFP-NEXT: bx lr entry: %conv = fptosi float %x to i64 %0 = icmp slt i64 %conv, 2147483647 @@ -541,47 +485,16 @@ ; VFP2-NEXT: push {r7, lr} ; VFP2-NEXT: vmov r0, s0 ; VFP2-NEXT: bl __aeabi_h2f -; VFP2-NEXT: bl __aeabi_f2lz -; VFP2-NEXT: mvn r2, #-2147483648 -; VFP2-NEXT: subs r3, r0, r2 -; VFP2-NEXT: mov.w r12, #0 -; VFP2-NEXT: sbcs r3, r1, #0 -; VFP2-NEXT: it lt -; VFP2-NEXT: movlt.w r12, #1 -; VFP2-NEXT: cmp.w r12, #0 -; VFP2-NEXT: itt ne -; VFP2-NEXT: movne r12, r1 -; VFP2-NEXT: movne r2, r0 -; VFP2-NEXT: mov.w r0, #-1 -; VFP2-NEXT: rsbs.w r1, r2, #-2147483648 -; VFP2-NEXT: sbcs.w r0, r0, r12 -; VFP2-NEXT: it ge -; VFP2-NEXT: movge.w r2, #-2147483648 -; VFP2-NEXT: mov r0, r2 +; VFP2-NEXT: vmov s0, r0 +; VFP2-NEXT: vcvt.s32.f32 s0, s0 +; VFP2-NEXT: vmov r0, s0 ; VFP2-NEXT: pop {r7, pc} ; ; FULL-LABEL: stest_f16i32: ; FULL: @ %bb.0: @ %entry -; FULL-NEXT: .save {r7, lr} -; FULL-NEXT: push {r7, lr} -; FULL-NEXT: vmov.f16 r0, s0 -; FULL-NEXT: vmov s0, r0 -; FULL-NEXT: bl __fixhfdi -; FULL-NEXT: mvn r12, #-2147483648 -; FULL-NEXT: subs.w r3, r0, r12 -; FULL-NEXT: mov.w r2, #0 -; FULL-NEXT: sbcs r3, r1, #0 -; FULL-NEXT: it lt -; FULL-NEXT: movlt r2, #1 -; FULL-NEXT: cmp r2, #0 -; FULL-NEXT: csel r0, r0, r12, ne -; FULL-NEXT: csel r1, r1, r2, ne -; FULL-NEXT: rsbs.w r3, r0, #-2147483648 -; FULL-NEXT: mov.w r2, #-1 -; FULL-NEXT: sbcs.w r1, r2, r1 -; FULL-NEXT: it ge -; FULL-NEXT: movge.w r0, #-2147483648 -; FULL-NEXT: pop {r7, pc} +; FULL-NEXT: vcvt.s32.f16 s0, s0 +; FULL-NEXT: vmov r0, s0 +; FULL-NEXT: bx lr entry: %conv = fptosi half %x to i64 %0 = icmp slt i64 %conv, 2147483647 @@ -2429,26 +2342,9 @@ ; ; FULL-LABEL: stest_f64i32_mm: ; FULL: @ %bb.0: @ %entry -; FULL-NEXT: .save {r7, lr} -; FULL-NEXT: push {r7, lr} -; FULL-NEXT: vmov r0, r1, d0 -; FULL-NEXT: bl __aeabi_d2lz -; FULL-NEXT: mvn r2, #-2147483648 -; FULL-NEXT: cmp r0, r2 -; FULL-NEXT: csel r3, r0, r2, lo -; FULL-NEXT: cmp r1, #0 -; FULL-NEXT: csel r0, r0, r2, mi -; FULL-NEXT: it pl -; FULL-NEXT: movpl r1, #0 -; FULL-NEXT: csel r0, r3, r0, eq -; FULL-NEXT: mov.w r2, #-2147483648 -; FULL-NEXT: cmp.w r1, #-1 -; FULL-NEXT: csel r3, r0, r2, gt -; FULL-NEXT: cmp.w r0, #-2147483648 -; FULL-NEXT: csel r0, r0, r2, hi -; FULL-NEXT: adds r1, #1 -; FULL-NEXT: csel r0, r0, r3, eq -; FULL-NEXT: pop {r7, pc} +; FULL-NEXT: vcvt.s32.f64 s0, d0 +; FULL-NEXT: vmov r0, s0 +; FULL-NEXT: bx lr entry: %conv = fptosi double %x to i64 %spec.store.select = call i64 @llvm.smin.i64(i64 %conv, i64 2147483647) @@ -2643,60 +2539,11 @@ ; SOFT-NEXT: .LCPI30_0: ; SOFT-NEXT: .long 2147483647 @ 0x7fffffff ; -; VFP2-LABEL: stest_f32i32_mm: -; VFP2: @ %bb.0: @ %entry -; VFP2-NEXT: .save {r7, lr} -; VFP2-NEXT: push {r7, lr} -; VFP2-NEXT: vmov r0, s0 -; VFP2-NEXT: bl __aeabi_f2lz -; VFP2-NEXT: mvn r2, #-2147483648 -; VFP2-NEXT: cmp r0, r2 -; VFP2-NEXT: mvn r3, #-2147483648 -; VFP2-NEXT: it lo -; VFP2-NEXT: movlo r3, r0 -; VFP2-NEXT: cmp r1, #0 -; VFP2-NEXT: it mi -; VFP2-NEXT: movmi r2, r0 -; VFP2-NEXT: mov.w r0, #-2147483648 -; VFP2-NEXT: it eq -; VFP2-NEXT: moveq r2, r3 -; VFP2-NEXT: it pl -; VFP2-NEXT: movpl r1, #0 -; VFP2-NEXT: cmp.w r1, #-1 -; VFP2-NEXT: mov.w r3, #-2147483648 -; VFP2-NEXT: it gt -; VFP2-NEXT: movgt r3, r2 -; VFP2-NEXT: cmp.w r2, #-2147483648 -; VFP2-NEXT: it ls -; VFP2-NEXT: movls r2, r0 -; VFP2-NEXT: adds r0, r1, #1 -; VFP2-NEXT: it ne -; VFP2-NEXT: movne r2, r3 -; VFP2-NEXT: mov r0, r2 -; VFP2-NEXT: pop {r7, pc} -; -; FULL-LABEL: stest_f32i32_mm: -; FULL: @ %bb.0: @ %entry -; FULL-NEXT: .save {r7, lr} -; FULL-NEXT: push {r7, lr} -; FULL-NEXT: vmov r0, s0 -; FULL-NEXT: bl __aeabi_f2lz -; FULL-NEXT: mvn r2, #-2147483648 -; FULL-NEXT: cmp r0, r2 -; FULL-NEXT: csel r3, r0, r2, lo -; FULL-NEXT: cmp r1, #0 -; FULL-NEXT: csel r0, r0, r2, mi -; FULL-NEXT: it pl -; FULL-NEXT: movpl r1, #0 -; FULL-NEXT: csel r0, r3, r0, eq -; FULL-NEXT: mov.w r2, #-2147483648 -; FULL-NEXT: cmp.w r1, #-1 -; FULL-NEXT: csel r3, r0, r2, gt -; FULL-NEXT: cmp.w r0, #-2147483648 -; FULL-NEXT: csel r0, r0, r2, hi -; FULL-NEXT: adds r1, #1 -; FULL-NEXT: csel r0, r0, r3, eq -; FULL-NEXT: pop {r7, pc} +; VFP-LABEL: stest_f32i32_mm: +; VFP: @ %bb.0: @ %entry +; VFP-NEXT: vcvt.s32.f32 s0, s0 +; VFP-NEXT: vmov r0, s0 +; VFP-NEXT: bx lr entry: %conv = fptosi float %x to i64 %spec.store.select = call i64 @llvm.smin.i64(i64 %conv, i64 2147483647) @@ -2895,56 +2742,16 @@ ; VFP2-NEXT: push {r7, lr} ; VFP2-NEXT: vmov r0, s0 ; VFP2-NEXT: bl __aeabi_h2f -; VFP2-NEXT: bl __aeabi_f2lz -; VFP2-NEXT: mvn r2, #-2147483648 -; VFP2-NEXT: cmp r0, r2 -; VFP2-NEXT: mvn r3, #-2147483648 -; VFP2-NEXT: it lo -; VFP2-NEXT: movlo r3, r0 -; VFP2-NEXT: cmp r1, #0 -; VFP2-NEXT: it mi -; VFP2-NEXT: movmi r2, r0 -; VFP2-NEXT: mov.w r0, #-2147483648 -; VFP2-NEXT: it eq -; VFP2-NEXT: moveq r2, r3 -; VFP2-NEXT: it pl -; VFP2-NEXT: movpl r1, #0 -; VFP2-NEXT: cmp.w r1, #-1 -; VFP2-NEXT: mov.w r3, #-2147483648 -; VFP2-NEXT: it gt -; VFP2-NEXT: movgt r3, r2 -; VFP2-NEXT: cmp.w r2, #-2147483648 -; VFP2-NEXT: it ls -; VFP2-NEXT: movls r2, r0 -; VFP2-NEXT: adds r0, r1, #1 -; VFP2-NEXT: it ne -; VFP2-NEXT: movne r2, r3 -; VFP2-NEXT: mov r0, r2 +; VFP2-NEXT: vmov s0, r0 +; VFP2-NEXT: vcvt.s32.f32 s0, s0 +; VFP2-NEXT: vmov r0, s0 ; VFP2-NEXT: pop {r7, pc} ; ; FULL-LABEL: stest_f16i32_mm: ; FULL: @ %bb.0: @ %entry -; FULL-NEXT: .save {r7, lr} -; FULL-NEXT: push {r7, lr} -; FULL-NEXT: vmov.f16 r0, s0 -; FULL-NEXT: vmov s0, r0 -; FULL-NEXT: bl __fixhfdi -; FULL-NEXT: mvn r2, #-2147483648 -; FULL-NEXT: cmp r0, r2 -; FULL-NEXT: csel r3, r0, r2, lo -; FULL-NEXT: cmp r1, #0 -; FULL-NEXT: csel r0, r0, r2, mi -; FULL-NEXT: it pl -; FULL-NEXT: movpl r1, #0 -; FULL-NEXT: csel r0, r3, r0, eq -; FULL-NEXT: mov.w r2, #-2147483648 -; FULL-NEXT: cmp.w r1, #-1 -; FULL-NEXT: csel r3, r0, r2, gt -; FULL-NEXT: cmp.w r0, #-2147483648 -; FULL-NEXT: csel r0, r0, r2, hi -; FULL-NEXT: adds r1, #1 -; FULL-NEXT: csel r0, r0, r3, eq -; FULL-NEXT: pop {r7, pc} +; FULL-NEXT: vcvt.s32.f16 s0, s0 +; FULL-NEXT: vmov r0, s0 +; FULL-NEXT: bx lr entry: %conv = fptosi half %x to i64 %spec.store.select = call i64 @llvm.smin.i64(i64 %conv, i64 2147483647) Index: llvm/test/CodeGen/RISCV/fpclamptosat.ll =================================================================== --- llvm/test/CodeGen/RISCV/fpclamptosat.ll +++ llvm/test/CodeGen/RISCV/fpclamptosat.ll @@ -11,42 +11,42 @@ ; i32 saturate define i32 @stest_f64i32(double %x) { -; RV32-LABEL: stest_f64i32: -; RV32: # %bb.0: # %entry -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: call __fixdfdi@plt -; RV32-NEXT: lui a2, 524288 -; RV32-NEXT: addi a3, a2, -1 -; RV32-NEXT: beqz a1, .LBB0_2 -; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: slti a4, a1, 0 -; RV32-NEXT: beqz a4, .LBB0_3 -; RV32-NEXT: j .LBB0_4 -; RV32-NEXT: .LBB0_2: -; RV32-NEXT: sltu a4, a0, a3 -; RV32-NEXT: bnez a4, .LBB0_4 -; RV32-NEXT: .LBB0_3: # %entry -; RV32-NEXT: mv a1, zero -; RV32-NEXT: mv a0, a3 -; RV32-NEXT: .LBB0_4: # %entry -; RV32-NEXT: addi a3, zero, -1 -; RV32-NEXT: beq a1, a3, .LBB0_6 -; RV32-NEXT: # %bb.5: # %entry -; RV32-NEXT: slt a1, a3, a1 -; RV32-NEXT: beqz a1, .LBB0_7 -; RV32-NEXT: j .LBB0_8 -; RV32-NEXT: .LBB0_6: -; RV32-NEXT: sltu a1, a2, a0 -; RV32-NEXT: bnez a1, .LBB0_8 -; RV32-NEXT: .LBB0_7: # %entry -; RV32-NEXT: lui a0, 524288 -; RV32-NEXT: .LBB0_8: # %entry -; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: ret +; RV32IF-LABEL: stest_f64i32: +; RV32IF: # %bb.0: # %entry +; RV32IF-NEXT: addi sp, sp, -16 +; RV32IF-NEXT: .cfi_def_cfa_offset 16 +; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IF-NEXT: .cfi_offset ra, -4 +; RV32IF-NEXT: call __fixdfdi@plt +; RV32IF-NEXT: lui a2, 524288 +; RV32IF-NEXT: addi a3, a2, -1 +; RV32IF-NEXT: beqz a1, .LBB0_2 +; RV32IF-NEXT: # %bb.1: # %entry +; RV32IF-NEXT: slti a4, a1, 0 +; RV32IF-NEXT: beqz a4, .LBB0_3 +; RV32IF-NEXT: j .LBB0_4 +; RV32IF-NEXT: .LBB0_2: +; RV32IF-NEXT: sltu a4, a0, a3 +; RV32IF-NEXT: bnez a4, .LBB0_4 +; RV32IF-NEXT: .LBB0_3: # %entry +; RV32IF-NEXT: mv a1, zero +; RV32IF-NEXT: mv a0, a3 +; RV32IF-NEXT: .LBB0_4: # %entry +; RV32IF-NEXT: addi a3, zero, -1 +; RV32IF-NEXT: beq a1, a3, .LBB0_6 +; RV32IF-NEXT: # %bb.5: # %entry +; RV32IF-NEXT: slt a1, a3, a1 +; RV32IF-NEXT: beqz a1, .LBB0_7 +; RV32IF-NEXT: j .LBB0_8 +; RV32IF-NEXT: .LBB0_6: +; RV32IF-NEXT: sltu a1, a2, a0 +; RV32IF-NEXT: bnez a1, .LBB0_8 +; RV32IF-NEXT: .LBB0_7: # %entry +; RV32IF-NEXT: lui a0, 524288 +; RV32IF-NEXT: .LBB0_8: # %entry +; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: ret ; ; RV64IF-LABEL: stest_f64i32: ; RV64IF: # %bb.0: # %entry @@ -69,6 +69,24 @@ ; RV64IF-NEXT: addi sp, sp, 16 ; RV64IF-NEXT: ret ; +; RV32IFD-LABEL: stest_f64i32: +; RV32IFD: # %bb.0: # %entry +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: .cfi_def_cfa_offset 16 +; RV32IFD-NEXT: sw a0, 8(sp) +; RV32IFD-NEXT: sw a1, 12(sp) +; RV32IFD-NEXT: fld ft0, 8(sp) +; RV32IFD-NEXT: feq.d a0, ft0, ft0 +; RV32IFD-NEXT: bnez a0, .LBB0_2 +; RV32IFD-NEXT: # %bb.1: # %entry +; RV32IFD-NEXT: mv a0, zero +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; RV32IFD-NEXT: .LBB0_2: +; RV32IFD-NEXT: fcvt.w.d a0, ft0, rtz +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; ; RV64IFD-LABEL: stest_f64i32: ; RV64IFD: # %bb.0: # %entry ; RV64IFD-NEXT: fmv.d.x ft0, a0 @@ -243,39 +261,14 @@ define i32 @stest_f32i32(float %x) { ; RV32-LABEL: stest_f32i32: ; RV32: # %bb.0: # %entry -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: call __fixsfdi@plt -; RV32-NEXT: lui a2, 524288 -; RV32-NEXT: addi a3, a2, -1 -; RV32-NEXT: beqz a1, .LBB3_2 +; RV32-NEXT: fmv.w.x ft0, a0 +; RV32-NEXT: feq.s a0, ft0, ft0 +; RV32-NEXT: bnez a0, .LBB3_2 ; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: slti a4, a1, 0 -; RV32-NEXT: beqz a4, .LBB3_3 -; RV32-NEXT: j .LBB3_4 +; RV32-NEXT: mv a0, zero +; RV32-NEXT: ret ; RV32-NEXT: .LBB3_2: -; RV32-NEXT: sltu a4, a0, a3 -; RV32-NEXT: bnez a4, .LBB3_4 -; RV32-NEXT: .LBB3_3: # %entry -; RV32-NEXT: mv a1, zero -; RV32-NEXT: mv a0, a3 -; RV32-NEXT: .LBB3_4: # %entry -; RV32-NEXT: addi a3, zero, -1 -; RV32-NEXT: beq a1, a3, .LBB3_6 -; RV32-NEXT: # %bb.5: # %entry -; RV32-NEXT: slt a1, a3, a1 -; RV32-NEXT: beqz a1, .LBB3_7 -; RV32-NEXT: j .LBB3_8 -; RV32-NEXT: .LBB3_6: -; RV32-NEXT: sltu a1, a2, a0 -; RV32-NEXT: bnez a1, .LBB3_8 -; RV32-NEXT: .LBB3_7: # %entry -; RV32-NEXT: lui a0, 524288 -; RV32-NEXT: .LBB3_8: # %entry -; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: fcvt.w.s a0, ft0, rtz ; RV32-NEXT: ret ; ; RV64-LABEL: stest_f32i32: @@ -1222,42 +1215,54 @@ ; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; -; RV64-LABEL: stest_f64i64: -; RV64: # %bb.0: # %entry -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: .cfi_def_cfa_offset 16 -; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64-NEXT: .cfi_offset ra, -8 -; RV64-NEXT: call __fixdfti@plt -; RV64-NEXT: addi a2, zero, -1 -; RV64-NEXT: srli a3, a2, 1 -; RV64-NEXT: beqz a1, .LBB18_2 -; RV64-NEXT: # %bb.1: # %entry -; RV64-NEXT: slti a4, a1, 0 -; RV64-NEXT: beqz a4, .LBB18_3 -; RV64-NEXT: j .LBB18_4 -; RV64-NEXT: .LBB18_2: -; RV64-NEXT: sltu a4, a0, a3 -; RV64-NEXT: bnez a4, .LBB18_4 -; RV64-NEXT: .LBB18_3: # %entry -; RV64-NEXT: mv a1, zero -; RV64-NEXT: mv a0, a3 -; RV64-NEXT: .LBB18_4: # %entry -; RV64-NEXT: slli a3, a2, 63 -; RV64-NEXT: beq a1, a2, .LBB18_6 -; RV64-NEXT: # %bb.5: # %entry -; RV64-NEXT: slt a1, a2, a1 -; RV64-NEXT: beqz a1, .LBB18_7 -; RV64-NEXT: j .LBB18_8 -; RV64-NEXT: .LBB18_6: -; RV64-NEXT: sltu a1, a3, a0 -; RV64-NEXT: bnez a1, .LBB18_8 -; RV64-NEXT: .LBB18_7: # %entry -; RV64-NEXT: mv a0, a3 -; RV64-NEXT: .LBB18_8: # %entry -; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 16 -; RV64-NEXT: ret +; RV64IF-LABEL: stest_f64i64: +; RV64IF: # %bb.0: # %entry +; RV64IF-NEXT: addi sp, sp, -16 +; RV64IF-NEXT: .cfi_def_cfa_offset 16 +; RV64IF-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IF-NEXT: .cfi_offset ra, -8 +; RV64IF-NEXT: call __fixdfti@plt +; RV64IF-NEXT: addi a2, zero, -1 +; RV64IF-NEXT: srli a3, a2, 1 +; RV64IF-NEXT: beqz a1, .LBB18_2 +; RV64IF-NEXT: # %bb.1: # %entry +; RV64IF-NEXT: slti a4, a1, 0 +; RV64IF-NEXT: beqz a4, .LBB18_3 +; RV64IF-NEXT: j .LBB18_4 +; RV64IF-NEXT: .LBB18_2: +; RV64IF-NEXT: sltu a4, a0, a3 +; RV64IF-NEXT: bnez a4, .LBB18_4 +; RV64IF-NEXT: .LBB18_3: # %entry +; RV64IF-NEXT: mv a1, zero +; RV64IF-NEXT: mv a0, a3 +; RV64IF-NEXT: .LBB18_4: # %entry +; RV64IF-NEXT: slli a3, a2, 63 +; RV64IF-NEXT: beq a1, a2, .LBB18_6 +; RV64IF-NEXT: # %bb.5: # %entry +; RV64IF-NEXT: slt a1, a2, a1 +; RV64IF-NEXT: beqz a1, .LBB18_7 +; RV64IF-NEXT: j .LBB18_8 +; RV64IF-NEXT: .LBB18_6: +; RV64IF-NEXT: sltu a1, a3, a0 +; RV64IF-NEXT: bnez a1, .LBB18_8 +; RV64IF-NEXT: .LBB18_7: # %entry +; RV64IF-NEXT: mv a0, a3 +; RV64IF-NEXT: .LBB18_8: # %entry +; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IF-NEXT: addi sp, sp, 16 +; RV64IF-NEXT: ret +; +; RV64IFD-LABEL: stest_f64i64: +; RV64IFD: # %bb.0: # %entry +; RV64IFD-NEXT: fmv.d.x ft0, a0 +; RV64IFD-NEXT: feq.d a0, ft0, ft0 +; RV64IFD-NEXT: bnez a0, .LBB18_2 +; RV64IFD-NEXT: # %bb.1: # %entry +; RV64IFD-NEXT: mv a0, zero +; RV64IFD-NEXT: ret +; RV64IFD-NEXT: .LBB18_2: +; RV64IFD-NEXT: fcvt.l.d a0, ft0, rtz +; RV64IFD-NEXT: ret entry: %conv = fptosi double %x to i128 %0 = icmp slt i128 %conv, 9223372036854775807 @@ -1494,39 +1499,14 @@ ; ; RV64-LABEL: stest_f32i64: ; RV64: # %bb.0: # %entry -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: .cfi_def_cfa_offset 16 -; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64-NEXT: .cfi_offset ra, -8 -; RV64-NEXT: call __fixsfti@plt -; RV64-NEXT: addi a2, zero, -1 -; RV64-NEXT: srli a3, a2, 1 -; RV64-NEXT: beqz a1, .LBB21_2 +; RV64-NEXT: fmv.w.x ft0, a0 +; RV64-NEXT: feq.s a0, ft0, ft0 +; RV64-NEXT: bnez a0, .LBB21_2 ; RV64-NEXT: # %bb.1: # %entry -; RV64-NEXT: slti a4, a1, 0 -; RV64-NEXT: beqz a4, .LBB21_3 -; RV64-NEXT: j .LBB21_4 +; RV64-NEXT: mv a0, zero +; RV64-NEXT: ret ; RV64-NEXT: .LBB21_2: -; RV64-NEXT: sltu a4, a0, a3 -; RV64-NEXT: bnez a4, .LBB21_4 -; RV64-NEXT: .LBB21_3: # %entry -; RV64-NEXT: mv a1, zero -; RV64-NEXT: mv a0, a3 -; RV64-NEXT: .LBB21_4: # %entry -; RV64-NEXT: slli a3, a2, 63 -; RV64-NEXT: beq a1, a2, .LBB21_6 -; RV64-NEXT: # %bb.5: # %entry -; RV64-NEXT: slt a1, a2, a1 -; RV64-NEXT: beqz a1, .LBB21_7 -; RV64-NEXT: j .LBB21_8 -; RV64-NEXT: .LBB21_6: -; RV64-NEXT: sltu a1, a3, a0 -; RV64-NEXT: bnez a1, .LBB21_8 -; RV64-NEXT: .LBB21_7: # %entry -; RV64-NEXT: mv a0, a3 -; RV64-NEXT: .LBB21_8: # %entry -; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: fcvt.l.s a0, ft0, rtz ; RV64-NEXT: ret entry: %conv = fptosi float %x to i128 @@ -1979,58 +1959,58 @@ ; i32 saturate define i32 @stest_f64i32_mm(double %x) { -; RV32-LABEL: stest_f64i32_mm: -; RV32: # %bb.0: # %entry -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: call __fixdfdi@plt -; RV32-NEXT: lui a2, 524288 -; RV32-NEXT: addi a4, a2, -1 -; RV32-NEXT: mv a3, a0 -; RV32-NEXT: bgez a1, .LBB27_9 -; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: bgeu a0, a4, .LBB27_10 -; RV32-NEXT: .LBB27_2: # %entry -; RV32-NEXT: bnez a1, .LBB27_11 -; RV32-NEXT: .LBB27_3: # %entry -; RV32-NEXT: bgez a1, .LBB27_12 -; RV32-NEXT: .LBB27_4: # %entry -; RV32-NEXT: mv a3, a0 -; RV32-NEXT: bltz a1, .LBB27_13 -; RV32-NEXT: .LBB27_5: # %entry -; RV32-NEXT: bgeu a2, a0, .LBB27_14 -; RV32-NEXT: .LBB27_6: # %entry -; RV32-NEXT: addi a2, zero, -1 -; RV32-NEXT: beq a1, a2, .LBB27_8 -; RV32-NEXT: .LBB27_7: # %entry -; RV32-NEXT: mv a0, a3 -; RV32-NEXT: .LBB27_8: # %entry -; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 16 -; RV32-NEXT: ret -; RV32-NEXT: .LBB27_9: # %entry -; RV32-NEXT: mv a3, a4 -; RV32-NEXT: bltu a0, a4, .LBB27_2 -; RV32-NEXT: .LBB27_10: # %entry -; RV32-NEXT: mv a0, a4 -; RV32-NEXT: beqz a1, .LBB27_3 -; RV32-NEXT: .LBB27_11: # %entry -; RV32-NEXT: mv a0, a3 -; RV32-NEXT: bltz a1, .LBB27_4 -; RV32-NEXT: .LBB27_12: # %entry -; RV32-NEXT: mv a1, zero -; RV32-NEXT: mv a3, a0 -; RV32-NEXT: bgez a1, .LBB27_5 -; RV32-NEXT: .LBB27_13: # %entry -; RV32-NEXT: lui a3, 524288 -; RV32-NEXT: bltu a2, a0, .LBB27_6 -; RV32-NEXT: .LBB27_14: # %entry -; RV32-NEXT: lui a0, 524288 -; RV32-NEXT: addi a2, zero, -1 -; RV32-NEXT: bne a1, a2, .LBB27_7 -; RV32-NEXT: j .LBB27_8 +; RV32IF-LABEL: stest_f64i32_mm: +; RV32IF: # %bb.0: # %entry +; RV32IF-NEXT: addi sp, sp, -16 +; RV32IF-NEXT: .cfi_def_cfa_offset 16 +; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32IF-NEXT: .cfi_offset ra, -4 +; RV32IF-NEXT: call __fixdfdi@plt +; RV32IF-NEXT: lui a2, 524288 +; RV32IF-NEXT: addi a4, a2, -1 +; RV32IF-NEXT: mv a3, a0 +; RV32IF-NEXT: bgez a1, .LBB27_9 +; RV32IF-NEXT: # %bb.1: # %entry +; RV32IF-NEXT: bgeu a0, a4, .LBB27_10 +; RV32IF-NEXT: .LBB27_2: # %entry +; RV32IF-NEXT: bnez a1, .LBB27_11 +; RV32IF-NEXT: .LBB27_3: # %entry +; RV32IF-NEXT: bgez a1, .LBB27_12 +; RV32IF-NEXT: .LBB27_4: # %entry +; RV32IF-NEXT: mv a3, a0 +; RV32IF-NEXT: bltz a1, .LBB27_13 +; RV32IF-NEXT: .LBB27_5: # %entry +; RV32IF-NEXT: bgeu a2, a0, .LBB27_14 +; RV32IF-NEXT: .LBB27_6: # %entry +; RV32IF-NEXT: addi a2, zero, -1 +; RV32IF-NEXT: beq a1, a2, .LBB27_8 +; RV32IF-NEXT: .LBB27_7: # %entry +; RV32IF-NEXT: mv a0, a3 +; RV32IF-NEXT: .LBB27_8: # %entry +; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32IF-NEXT: addi sp, sp, 16 +; RV32IF-NEXT: ret +; RV32IF-NEXT: .LBB27_9: # %entry +; RV32IF-NEXT: mv a3, a4 +; RV32IF-NEXT: bltu a0, a4, .LBB27_2 +; RV32IF-NEXT: .LBB27_10: # %entry +; RV32IF-NEXT: mv a0, a4 +; RV32IF-NEXT: beqz a1, .LBB27_3 +; RV32IF-NEXT: .LBB27_11: # %entry +; RV32IF-NEXT: mv a0, a3 +; RV32IF-NEXT: bltz a1, .LBB27_4 +; RV32IF-NEXT: .LBB27_12: # %entry +; RV32IF-NEXT: mv a1, zero +; RV32IF-NEXT: mv a3, a0 +; RV32IF-NEXT: bgez a1, .LBB27_5 +; RV32IF-NEXT: .LBB27_13: # %entry +; RV32IF-NEXT: lui a3, 524288 +; RV32IF-NEXT: bltu a2, a0, .LBB27_6 +; RV32IF-NEXT: .LBB27_14: # %entry +; RV32IF-NEXT: lui a0, 524288 +; RV32IF-NEXT: addi a2, zero, -1 +; RV32IF-NEXT: bne a1, a2, .LBB27_7 +; RV32IF-NEXT: j .LBB27_8 ; ; RV64IF-LABEL: stest_f64i32_mm: ; RV64IF: # %bb.0: # %entry @@ -2053,6 +2033,24 @@ ; RV64IF-NEXT: addi sp, sp, 16 ; RV64IF-NEXT: ret ; +; RV32IFD-LABEL: stest_f64i32_mm: +; RV32IFD: # %bb.0: # %entry +; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: .cfi_def_cfa_offset 16 +; RV32IFD-NEXT: sw a0, 8(sp) +; RV32IFD-NEXT: sw a1, 12(sp) +; RV32IFD-NEXT: fld ft0, 8(sp) +; RV32IFD-NEXT: feq.d a0, ft0, ft0 +; RV32IFD-NEXT: bnez a0, .LBB27_2 +; RV32IFD-NEXT: # %bb.1: # %entry +; RV32IFD-NEXT: mv a0, zero +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; RV32IFD-NEXT: .LBB27_2: +; RV32IFD-NEXT: fcvt.w.d a0, ft0, rtz +; RV32IFD-NEXT: addi sp, sp, 16 +; RV32IFD-NEXT: ret +; ; RV64IFD-LABEL: stest_f64i32_mm: ; RV64IFD: # %bb.0: # %entry ; RV64IFD-NEXT: fmv.d.x ft0, a0 @@ -2218,56 +2216,15 @@ define i32 @stest_f32i32_mm(float %x) { ; RV32-LABEL: stest_f32i32_mm: ; RV32: # %bb.0: # %entry -; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32-NEXT: .cfi_offset ra, -4 -; RV32-NEXT: call __fixsfdi@plt -; RV32-NEXT: lui a2, 524288 -; RV32-NEXT: addi a4, a2, -1 -; RV32-NEXT: mv a3, a0 -; RV32-NEXT: bgez a1, .LBB30_9 +; RV32-NEXT: fmv.w.x ft0, a0 +; RV32-NEXT: feq.s a0, ft0, ft0 +; RV32-NEXT: bnez a0, .LBB30_2 ; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: bgeu a0, a4, .LBB30_10 -; RV32-NEXT: .LBB30_2: # %entry -; RV32-NEXT: bnez a1, .LBB30_11 -; RV32-NEXT: .LBB30_3: # %entry -; RV32-NEXT: bgez a1, .LBB30_12 -; RV32-NEXT: .LBB30_4: # %entry -; RV32-NEXT: mv a3, a0 -; RV32-NEXT: bltz a1, .LBB30_13 -; RV32-NEXT: .LBB30_5: # %entry -; RV32-NEXT: bgeu a2, a0, .LBB30_14 -; RV32-NEXT: .LBB30_6: # %entry -; RV32-NEXT: addi a2, zero, -1 -; RV32-NEXT: beq a1, a2, .LBB30_8 -; RV32-NEXT: .LBB30_7: # %entry -; RV32-NEXT: mv a0, a3 -; RV32-NEXT: .LBB30_8: # %entry -; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload -; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: mv a0, zero +; RV32-NEXT: ret +; RV32-NEXT: .LBB30_2: +; RV32-NEXT: fcvt.w.s a0, ft0, rtz ; RV32-NEXT: ret -; RV32-NEXT: .LBB30_9: # %entry -; RV32-NEXT: mv a3, a4 -; RV32-NEXT: bltu a0, a4, .LBB30_2 -; RV32-NEXT: .LBB30_10: # %entry -; RV32-NEXT: mv a0, a4 -; RV32-NEXT: beqz a1, .LBB30_3 -; RV32-NEXT: .LBB30_11: # %entry -; RV32-NEXT: mv a0, a3 -; RV32-NEXT: bltz a1, .LBB30_4 -; RV32-NEXT: .LBB30_12: # %entry -; RV32-NEXT: mv a1, zero -; RV32-NEXT: mv a3, a0 -; RV32-NEXT: bgez a1, .LBB30_5 -; RV32-NEXT: .LBB30_13: # %entry -; RV32-NEXT: lui a3, 524288 -; RV32-NEXT: bltu a2, a0, .LBB30_6 -; RV32-NEXT: .LBB30_14: # %entry -; RV32-NEXT: lui a0, 524288 -; RV32-NEXT: addi a2, zero, -1 -; RV32-NEXT: bne a1, a2, .LBB30_7 -; RV32-NEXT: j .LBB30_8 ; ; RV64-LABEL: stest_f32i32_mm: ; RV64: # %bb.0: # %entry @@ -3253,56 +3210,68 @@ ; RV32-NEXT: bne a2, a6, .LBB45_17 ; RV32-NEXT: j .LBB45_18 ; -; RV64-LABEL: stest_f64i64_mm: -; RV64: # %bb.0: # %entry -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: .cfi_def_cfa_offset 16 -; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64-NEXT: .cfi_offset ra, -8 -; RV64-NEXT: call __fixdfti@plt -; RV64-NEXT: addi a2, zero, -1 -; RV64-NEXT: srli a4, a2, 1 -; RV64-NEXT: mv a3, a0 -; RV64-NEXT: bgez a1, .LBB45_10 -; RV64-NEXT: # %bb.1: # %entry -; RV64-NEXT: bgeu a0, a4, .LBB45_11 -; RV64-NEXT: .LBB45_2: # %entry -; RV64-NEXT: bnez a1, .LBB45_12 -; RV64-NEXT: .LBB45_3: # %entry -; RV64-NEXT: bltz a1, .LBB45_5 -; RV64-NEXT: .LBB45_4: # %entry -; RV64-NEXT: mv a1, zero -; RV64-NEXT: .LBB45_5: # %entry -; RV64-NEXT: slli a4, a2, 63 -; RV64-NEXT: mv a3, a0 -; RV64-NEXT: bltz a1, .LBB45_13 -; RV64-NEXT: # %bb.6: # %entry -; RV64-NEXT: bgeu a4, a0, .LBB45_14 -; RV64-NEXT: .LBB45_7: # %entry -; RV64-NEXT: beq a1, a2, .LBB45_9 -; RV64-NEXT: .LBB45_8: # %entry -; RV64-NEXT: mv a0, a3 -; RV64-NEXT: .LBB45_9: # %entry -; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 16 -; RV64-NEXT: ret -; RV64-NEXT: .LBB45_10: # %entry -; RV64-NEXT: mv a3, a4 -; RV64-NEXT: bltu a0, a4, .LBB45_2 -; RV64-NEXT: .LBB45_11: # %entry -; RV64-NEXT: mv a0, a4 -; RV64-NEXT: beqz a1, .LBB45_3 -; RV64-NEXT: .LBB45_12: # %entry -; RV64-NEXT: mv a0, a3 -; RV64-NEXT: bgez a1, .LBB45_4 -; RV64-NEXT: j .LBB45_5 -; RV64-NEXT: .LBB45_13: # %entry -; RV64-NEXT: mv a3, a4 -; RV64-NEXT: bltu a4, a0, .LBB45_7 -; RV64-NEXT: .LBB45_14: # %entry -; RV64-NEXT: mv a0, a4 -; RV64-NEXT: bne a1, a2, .LBB45_8 -; RV64-NEXT: j .LBB45_9 +; RV64IF-LABEL: stest_f64i64_mm: +; RV64IF: # %bb.0: # %entry +; RV64IF-NEXT: addi sp, sp, -16 +; RV64IF-NEXT: .cfi_def_cfa_offset 16 +; RV64IF-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64IF-NEXT: .cfi_offset ra, -8 +; RV64IF-NEXT: call __fixdfti@plt +; RV64IF-NEXT: addi a2, zero, -1 +; RV64IF-NEXT: srli a4, a2, 1 +; RV64IF-NEXT: mv a3, a0 +; RV64IF-NEXT: bgez a1, .LBB45_10 +; RV64IF-NEXT: # %bb.1: # %entry +; RV64IF-NEXT: bgeu a0, a4, .LBB45_11 +; RV64IF-NEXT: .LBB45_2: # %entry +; RV64IF-NEXT: bnez a1, .LBB45_12 +; RV64IF-NEXT: .LBB45_3: # %entry +; RV64IF-NEXT: bltz a1, .LBB45_5 +; RV64IF-NEXT: .LBB45_4: # %entry +; RV64IF-NEXT: mv a1, zero +; RV64IF-NEXT: .LBB45_5: # %entry +; RV64IF-NEXT: slli a4, a2, 63 +; RV64IF-NEXT: mv a3, a0 +; RV64IF-NEXT: bltz a1, .LBB45_13 +; RV64IF-NEXT: # %bb.6: # %entry +; RV64IF-NEXT: bgeu a4, a0, .LBB45_14 +; RV64IF-NEXT: .LBB45_7: # %entry +; RV64IF-NEXT: beq a1, a2, .LBB45_9 +; RV64IF-NEXT: .LBB45_8: # %entry +; RV64IF-NEXT: mv a0, a3 +; RV64IF-NEXT: .LBB45_9: # %entry +; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64IF-NEXT: addi sp, sp, 16 +; RV64IF-NEXT: ret +; RV64IF-NEXT: .LBB45_10: # %entry +; RV64IF-NEXT: mv a3, a4 +; RV64IF-NEXT: bltu a0, a4, .LBB45_2 +; RV64IF-NEXT: .LBB45_11: # %entry +; RV64IF-NEXT: mv a0, a4 +; RV64IF-NEXT: beqz a1, .LBB45_3 +; RV64IF-NEXT: .LBB45_12: # %entry +; RV64IF-NEXT: mv a0, a3 +; RV64IF-NEXT: bgez a1, .LBB45_4 +; RV64IF-NEXT: j .LBB45_5 +; RV64IF-NEXT: .LBB45_13: # %entry +; RV64IF-NEXT: mv a3, a4 +; RV64IF-NEXT: bltu a4, a0, .LBB45_7 +; RV64IF-NEXT: .LBB45_14: # %entry +; RV64IF-NEXT: mv a0, a4 +; RV64IF-NEXT: bne a1, a2, .LBB45_8 +; RV64IF-NEXT: j .LBB45_9 +; +; RV64IFD-LABEL: stest_f64i64_mm: +; RV64IFD: # %bb.0: # %entry +; RV64IFD-NEXT: fmv.d.x ft0, a0 +; RV64IFD-NEXT: feq.d a0, ft0, ft0 +; RV64IFD-NEXT: bnez a0, .LBB45_2 +; RV64IFD-NEXT: # %bb.1: # %entry +; RV64IFD-NEXT: mv a0, zero +; RV64IFD-NEXT: ret +; RV64IFD-NEXT: .LBB45_2: +; RV64IFD-NEXT: fcvt.l.d a0, ft0, rtz +; RV64IFD-NEXT: ret entry: %conv = fptosi double %x to i128 %spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 9223372036854775807) @@ -3667,54 +3636,15 @@ ; ; RV64-LABEL: stest_f32i64_mm: ; RV64: # %bb.0: # %entry -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: .cfi_def_cfa_offset 16 -; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64-NEXT: .cfi_offset ra, -8 -; RV64-NEXT: call __fixsfti@plt -; RV64-NEXT: addi a2, zero, -1 -; RV64-NEXT: srli a4, a2, 1 -; RV64-NEXT: mv a3, a0 -; RV64-NEXT: bgez a1, .LBB48_10 +; RV64-NEXT: fmv.w.x ft0, a0 +; RV64-NEXT: feq.s a0, ft0, ft0 +; RV64-NEXT: bnez a0, .LBB48_2 ; RV64-NEXT: # %bb.1: # %entry -; RV64-NEXT: bgeu a0, a4, .LBB48_11 -; RV64-NEXT: .LBB48_2: # %entry -; RV64-NEXT: bnez a1, .LBB48_12 -; RV64-NEXT: .LBB48_3: # %entry -; RV64-NEXT: bltz a1, .LBB48_5 -; RV64-NEXT: .LBB48_4: # %entry -; RV64-NEXT: mv a1, zero -; RV64-NEXT: .LBB48_5: # %entry -; RV64-NEXT: slli a4, a2, 63 -; RV64-NEXT: mv a3, a0 -; RV64-NEXT: bltz a1, .LBB48_13 -; RV64-NEXT: # %bb.6: # %entry -; RV64-NEXT: bgeu a4, a0, .LBB48_14 -; RV64-NEXT: .LBB48_7: # %entry -; RV64-NEXT: beq a1, a2, .LBB48_9 -; RV64-NEXT: .LBB48_8: # %entry -; RV64-NEXT: mv a0, a3 -; RV64-NEXT: .LBB48_9: # %entry -; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload -; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: mv a0, zero +; RV64-NEXT: ret +; RV64-NEXT: .LBB48_2: +; RV64-NEXT: fcvt.l.s a0, ft0, rtz ; RV64-NEXT: ret -; RV64-NEXT: .LBB48_10: # %entry -; RV64-NEXT: mv a3, a4 -; RV64-NEXT: bltu a0, a4, .LBB48_2 -; RV64-NEXT: .LBB48_11: # %entry -; RV64-NEXT: mv a0, a4 -; RV64-NEXT: beqz a1, .LBB48_3 -; RV64-NEXT: .LBB48_12: # %entry -; RV64-NEXT: mv a0, a3 -; RV64-NEXT: bgez a1, .LBB48_4 -; RV64-NEXT: j .LBB48_5 -; RV64-NEXT: .LBB48_13: # %entry -; RV64-NEXT: mv a3, a4 -; RV64-NEXT: bltu a4, a0, .LBB48_7 -; RV64-NEXT: .LBB48_14: # %entry -; RV64-NEXT: mv a0, a4 -; RV64-NEXT: bne a1, a2, .LBB48_8 -; RV64-NEXT: j .LBB48_9 entry: %conv = fptosi float %x to i128 %spec.store.select = call i128 @llvm.smin.i128(i128 %conv, i128 9223372036854775807) Index: llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll +++ llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll @@ -205,141 +205,8 @@ define arm_aapcs_vfpcc <4 x i32> @stest_f32i32(<4 x float> %x) { ; CHECK-LABEL: stest_f32i32: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEXT: .pad #4 -; CHECK-NEXT: sub sp, #4 -; CHECK-NEXT: .vsave {d8, d9} -; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: .pad #8 -; CHECK-NEXT: sub sp, #8 -; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vmov r0, r6, d8 -; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: mov r9, r0 -; CHECK-NEXT: mvn r0, #-2147483648 -; CHECK-NEXT: subs.w r0, r9, r0 -; CHECK-NEXT: str r1, [sp, #4] @ 4-byte Spill -; CHECK-NEXT: sbcs r0, r1, #0 -; CHECK-NEXT: mvn r4, #-2147483648 -; CHECK-NEXT: mov.w r0, #0 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r0, #1 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: mov r0, r6 -; CHECK-NEXT: csetm r11, ne -; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: mov r6, r0 -; CHECK-NEXT: subs r0, r0, r4 -; CHECK-NEXT: sbcs r0, r1, #0 -; CHECK-NEXT: mov r10, r1 -; CHECK-NEXT: mov.w r0, #0 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r0, #1 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: vmov r4, r0, d9 -; CHECK-NEXT: csetm r8, ne -; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: mov r5, r0 -; CHECK-NEXT: mov r0, r4 -; CHECK-NEXT: mov r7, r1 -; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: adr r2, .LCPI3_0 -; CHECK-NEXT: mvn r4, #-2147483648 -; CHECK-NEXT: vldrw.u32 q0, [r2] -; CHECK-NEXT: adr r2, .LCPI3_1 -; CHECK-NEXT: vldrw.u32 q2, [r2] -; CHECK-NEXT: subs r2, r5, r4 -; CHECK-NEXT: ldr r2, [sp, #4] @ 4-byte Reload -; CHECK-NEXT: vmov q1[2], q1[0], r9, r6 -; CHECK-NEXT: vmov q4[2], q4[0], r11, r8 -; CHECK-NEXT: vmov q3[2], q3[0], r0, r5 -; CHECK-NEXT: vmov q1[3], q1[1], r2, r10 -; CHECK-NEXT: vmov q4[3], q4[1], r11, r8 -; CHECK-NEXT: vand q1, q1, q4 -; CHECK-NEXT: vbic q4, q2, q4 -; CHECK-NEXT: vorr q1, q1, q4 -; CHECK-NEXT: vmov q3[3], q3[1], r1, r7 -; CHECK-NEXT: vmov r2, r3, d2 -; CHECK-NEXT: sbcs r7, r7, #0 -; CHECK-NEXT: vmov r6, r5, d3 -; CHECK-NEXT: mov.w r7, #0 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r7, #1 -; CHECK-NEXT: cmp r7, #0 -; CHECK-NEXT: csetm r7, ne -; CHECK-NEXT: subs r0, r0, r4 -; CHECK-NEXT: sbcs r0, r1, #0 -; CHECK-NEXT: mov.w r0, #0 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r0, #1 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r1, ne -; CHECK-NEXT: mov.w r0, #-1 -; CHECK-NEXT: vmov q4[2], q4[0], r1, r7 -; CHECK-NEXT: vmov q4[3], q4[1], r1, r7 -; CHECK-NEXT: vand q3, q3, q4 -; CHECK-NEXT: vbic q2, q2, q4 -; CHECK-NEXT: vorr q2, q3, q2 -; CHECK-NEXT: vmov r1, r7, d5 -; CHECK-NEXT: rsbs.w r2, r2, #-2147483648 -; CHECK-NEXT: sbcs.w r2, r0, r3 -; CHECK-NEXT: mov.w r2, #0 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r2, #1 -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csetm r2, ne -; CHECK-NEXT: rsbs.w r3, r6, #-2147483648 -; CHECK-NEXT: sbcs.w r3, r0, r5 -; CHECK-NEXT: vmov r6, r5, d4 -; CHECK-NEXT: mov.w r3, #0 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r3, #1 -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: csetm r3, ne -; CHECK-NEXT: rsbs.w r1, r1, #-2147483648 -; CHECK-NEXT: sbcs.w r1, r0, r7 -; CHECK-NEXT: mov.w r1, #0 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r1, #1 -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: csetm r1, ne -; CHECK-NEXT: rsbs.w r7, r6, #-2147483648 -; CHECK-NEXT: sbcs r0, r5 -; CHECK-NEXT: mov.w r0, #0 -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r0, #1 -; CHECK-NEXT: cmp r0, #0 -; CHECK-NEXT: csetm r0, ne -; CHECK-NEXT: vmov.32 q3[1], r0 -; CHECK-NEXT: vmov q3[2], q3[0], r0, r1 -; CHECK-NEXT: vbic q4, q0, q3 -; CHECK-NEXT: vand q2, q2, q3 -; CHECK-NEXT: vmov.32 q3[1], r2 -; CHECK-NEXT: vorr q2, q2, q4 -; CHECK-NEXT: vmov q3[2], q3[0], r2, r3 -; CHECK-NEXT: vbic q0, q0, q3 -; CHECK-NEXT: vand q1, q1, q3 -; CHECK-NEXT: vorr q0, q1, q0 -; CHECK-NEXT: vmov.f32 s1, s2 -; CHECK-NEXT: vmov.f32 s2, s8 -; CHECK-NEXT: vmov.f32 s3, s10 -; CHECK-NEXT: add sp, #8 -; CHECK-NEXT: vpop {d8, d9} -; CHECK-NEXT: add sp, #4 -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} -; CHECK-NEXT: .p2align 4 -; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI3_0: -; CHECK-NEXT: .long 2147483648 @ 0x80000000 -; CHECK-NEXT: .long 4294967295 @ 0xffffffff -; CHECK-NEXT: .long 2147483648 @ 0x80000000 -; CHECK-NEXT: .long 4294967295 @ 0xffffffff -; CHECK-NEXT: .LCPI3_1: -; CHECK-NEXT: .long 2147483647 @ 0x7fffffff -; CHECK-NEXT: .long 0 @ 0x0 -; CHECK-NEXT: .long 2147483647 @ 0x7fffffff -; CHECK-NEXT: .long 0 @ 0x0 +; CHECK-NEXT: vcvt.s32.f32 q0, q0 +; CHECK-NEXT: bx lr entry: %conv = fptosi <4 x float> %x to <4 x i64> %0 = icmp slt <4 x i64> %conv, @@ -1140,42 +1007,7 @@ define arm_aapcs_vfpcc <8 x i16> @stest_f16i16(<8 x half> %x) { ; CHECK-LABEL: stest_f16i16: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .vsave {d8, d9} -; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: .pad #16 -; CHECK-NEXT: sub sp, #16 -; CHECK-NEXT: vmovx.f16 s12, s2 -; CHECK-NEXT: vmovx.f16 s10, s3 -; CHECK-NEXT: vcvt.s32.f16 s14, s3 -; CHECK-NEXT: vcvt.s32.f16 s2, s2 -; CHECK-NEXT: vcvt.s32.f16 s10, s10 -; CHECK-NEXT: vcvt.s32.f16 s12, s12 -; CHECK-NEXT: vmov r1, s14 -; CHECK-NEXT: vmovx.f16 s6, s0 -; CHECK-NEXT: vmov r2, s2 -; CHECK-NEXT: vmovx.f16 s4, s1 -; CHECK-NEXT: vmov q4[2], q4[0], r2, r1 -; CHECK-NEXT: vcvt.s32.f16 s8, s1 -; CHECK-NEXT: vcvt.s32.f16 s0, s0 -; CHECK-NEXT: vmov r1, s10 -; CHECK-NEXT: vmov r2, s12 -; CHECK-NEXT: vcvt.s32.f16 s4, s4 -; CHECK-NEXT: vmov q4[3], q4[1], r2, r1 -; CHECK-NEXT: vcvt.s32.f16 s6, s6 -; CHECK-NEXT: vmov r1, s8 -; CHECK-NEXT: mov r0, sp -; CHECK-NEXT: vmov r2, s0 -; CHECK-NEXT: vqmovnb.s32 q3, q4 -; CHECK-NEXT: vmov q0[2], q0[0], r2, r1 -; CHECK-NEXT: vmov r1, s4 -; CHECK-NEXT: vmov r2, s6 -; CHECK-NEXT: vstrh.32 q3, [r0, #8] -; CHECK-NEXT: vmov q0[3], q0[1], r2, r1 -; CHECK-NEXT: vqmovnb.s32 q0, q0 -; CHECK-NEXT: vstrh.32 q0, [r0] -; CHECK-NEXT: vldrw.u32 q0, [r0] -; CHECK-NEXT: add sp, #16 -; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: vcvt.s16.f16 q0, q0 ; CHECK-NEXT: bx lr entry: %conv = fptosi <8 x half> %x to <8 x i32> @@ -2060,117 +1892,8 @@ define arm_aapcs_vfpcc <4 x i32> @stest_f32i32_mm(<4 x float> %x) { ; CHECK-LABEL: stest_f32i32_mm: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} -; CHECK-NEXT: .pad #4 -; CHECK-NEXT: sub sp, #4 -; CHECK-NEXT: .vsave {d8, d9} -; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: mov.w r6, #-2147483648 -; CHECK-NEXT: vmov r0, r10, d9 -; CHECK-NEXT: mvn r7, #-2147483648 -; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: mov.w r8, #0 -; CHECK-NEXT: cset r2, mi -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r2, r0, r7, ne -; CHECK-NEXT: cmp r0, r7 -; CHECK-NEXT: csel r0, r0, r7, lo -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: cset r3, eq -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: csel r0, r0, r2, ne -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: csel r1, r1, r8, mi -; CHECK-NEXT: cmp.w r1, #-1 -; CHECK-NEXT: cset r2, gt -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r2, r0, r6, ne -; CHECK-NEXT: cmp.w r0, #-2147483648 -; CHECK-NEXT: csel r3, r0, r6, hi -; CHECK-NEXT: adds r0, r1, #1 -; CHECK-NEXT: vmov r0, r11, d8 -; CHECK-NEXT: cset r1, eq -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: csel r9, r3, r2, ne -; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: cset r2, mi -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r2, r0, r7, ne -; CHECK-NEXT: cmp r0, r7 -; CHECK-NEXT: csel r0, r0, r7, lo -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: cset r3, eq -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: csel r0, r0, r2, ne -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: csel r1, r1, r8, mi -; CHECK-NEXT: cmp.w r1, #-1 -; CHECK-NEXT: cset r2, gt -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r2, r0, r6, ne -; CHECK-NEXT: cmp.w r0, #-2147483648 -; CHECK-NEXT: csel r0, r0, r6, hi -; CHECK-NEXT: adds r1, #1 -; CHECK-NEXT: cset r1, eq -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: csel r4, r0, r2, ne -; CHECK-NEXT: mov r0, r10 -; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: cset r2, mi -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r2, r0, r7, ne -; CHECK-NEXT: cmp r0, r7 -; CHECK-NEXT: csel r0, r0, r7, lo -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: cset r3, eq -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: csel r0, r0, r2, ne -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: csel r1, r1, r8, mi -; CHECK-NEXT: cmp.w r1, #-1 -; CHECK-NEXT: cset r2, gt -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r2, r0, r6, ne -; CHECK-NEXT: cmp.w r0, #-2147483648 -; CHECK-NEXT: csel r0, r0, r6, hi -; CHECK-NEXT: adds r1, #1 -; CHECK-NEXT: cset r1, eq -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: csel r5, r0, r2, ne -; CHECK-NEXT: mov r0, r11 -; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: vmov q0[2], q0[0], r4, r9 -; CHECK-NEXT: cset r2, mi -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r2, r0, r7, ne -; CHECK-NEXT: cmp r0, r7 -; CHECK-NEXT: csel r0, r0, r7, lo -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: cset r3, eq -; CHECK-NEXT: cmp r3, #0 -; CHECK-NEXT: csel r0, r0, r2, ne -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: csel r1, r1, r8, mi -; CHECK-NEXT: cmp.w r1, #-1 -; CHECK-NEXT: cset r2, gt -; CHECK-NEXT: cmp r2, #0 -; CHECK-NEXT: csel r2, r0, r6, ne -; CHECK-NEXT: cmp.w r0, #-2147483648 -; CHECK-NEXT: csel r0, r0, r6, hi -; CHECK-NEXT: adds r1, #1 -; CHECK-NEXT: cset r1, eq -; CHECK-NEXT: cmp r1, #0 -; CHECK-NEXT: csel r0, r0, r2, ne -; CHECK-NEXT: vmov q0[3], q0[1], r0, r5 -; CHECK-NEXT: vpop {d8, d9} -; CHECK-NEXT: add sp, #4 -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} +; CHECK-NEXT: vcvt.s32.f32 q0, q0 +; CHECK-NEXT: bx lr entry: %conv = fptosi <4 x float> %x to <4 x i64> %spec.store.select = call <4 x i64> @llvm.smin.v4i64(<4 x i64> %conv, <4 x i64> ) @@ -2851,42 +2574,7 @@ define arm_aapcs_vfpcc <8 x i16> @stest_f16i16_mm(<8 x half> %x) { ; CHECK-LABEL: stest_f16i16_mm: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .vsave {d8, d9} -; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: .pad #16 -; CHECK-NEXT: sub sp, #16 -; CHECK-NEXT: vmovx.f16 s12, s2 -; CHECK-NEXT: vmovx.f16 s10, s3 -; CHECK-NEXT: vcvt.s32.f16 s14, s3 -; CHECK-NEXT: vcvt.s32.f16 s2, s2 -; CHECK-NEXT: vcvt.s32.f16 s10, s10 -; CHECK-NEXT: vcvt.s32.f16 s12, s12 -; CHECK-NEXT: vmov r1, s14 -; CHECK-NEXT: vmovx.f16 s6, s0 -; CHECK-NEXT: vmov r2, s2 -; CHECK-NEXT: vmovx.f16 s4, s1 -; CHECK-NEXT: vmov q4[2], q4[0], r2, r1 -; CHECK-NEXT: vcvt.s32.f16 s8, s1 -; CHECK-NEXT: vcvt.s32.f16 s0, s0 -; CHECK-NEXT: vmov r1, s10 -; CHECK-NEXT: vmov r2, s12 -; CHECK-NEXT: vcvt.s32.f16 s4, s4 -; CHECK-NEXT: vmov q4[3], q4[1], r2, r1 -; CHECK-NEXT: vcvt.s32.f16 s6, s6 -; CHECK-NEXT: vmov r1, s8 -; CHECK-NEXT: mov r0, sp -; CHECK-NEXT: vmov r2, s0 -; CHECK-NEXT: vqmovnb.s32 q3, q4 -; CHECK-NEXT: vmov q0[2], q0[0], r2, r1 -; CHECK-NEXT: vmov r1, s4 -; CHECK-NEXT: vmov r2, s6 -; CHECK-NEXT: vstrh.32 q3, [r0, #8] -; CHECK-NEXT: vmov q0[3], q0[1], r2, r1 -; CHECK-NEXT: vqmovnb.s32 q0, q0 -; CHECK-NEXT: vstrh.32 q0, [r0] -; CHECK-NEXT: vldrw.u32 q0, [r0] -; CHECK-NEXT: add sp, #16 -; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: vcvt.s16.f16 q0, q0 ; CHECK-NEXT: bx lr entry: %conv = fptosi <8 x half> %x to <8 x i32> Index: llvm/test/CodeGen/WebAssembly/fpclamptosat.ll =================================================================== --- llvm/test/CodeGen/WebAssembly/fpclamptosat.ll +++ llvm/test/CodeGen/WebAssembly/fpclamptosat.ll @@ -6,23 +6,9 @@ define i32 @stest_f64i32(double %x) { ; CHECK-LABEL: stest_f64i32: ; CHECK: .functype stest_f64i32 (f64) -> (i32) -; CHECK-NEXT: .local i64 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: i64.trunc_sat_f64_s -; CHECK-NEXT: local.tee 1 -; CHECK-NEXT: i64.const 2147483647 -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i64.const 2147483647 -; CHECK-NEXT: i64.lt_s -; CHECK-NEXT: i64.select -; CHECK-NEXT: local.tee 1 -; CHECK-NEXT: i64.const -2147483648 -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i64.const -2147483648 -; CHECK-NEXT: i64.gt_s -; CHECK-NEXT: i64.select -; CHECK-NEXT: i32.wrap_i64 +; CHECK-NEXT: i32.trunc_sat_f64_s ; CHECK-NEXT: # fallthrough-return entry: %conv = fptosi double %x to i64 @@ -91,23 +77,9 @@ define i32 @stest_f32i32(float %x) { ; CHECK-LABEL: stest_f32i32: ; CHECK: .functype stest_f32i32 (f32) -> (i32) -; CHECK-NEXT: .local i64 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: i64.trunc_sat_f32_s -; CHECK-NEXT: local.tee 1 -; CHECK-NEXT: i64.const 2147483647 -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i64.const 2147483647 -; CHECK-NEXT: i64.lt_s -; CHECK-NEXT: i64.select -; CHECK-NEXT: local.tee 1 -; CHECK-NEXT: i64.const -2147483648 -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i64.const -2147483648 -; CHECK-NEXT: i64.gt_s -; CHECK-NEXT: i64.select -; CHECK-NEXT: i32.wrap_i64 +; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: # fallthrough-return entry: %conv = fptosi float %x to i64 @@ -176,25 +148,11 @@ define i32 @stest_f16i32(half %x) { ; CHECK-LABEL: stest_f16i32: ; CHECK: .functype stest_f16i32 (f32) -> (i32) -; CHECK-NEXT: .local i64 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: i64.trunc_sat_f32_s -; CHECK-NEXT: local.tee 1 -; CHECK-NEXT: i64.const 2147483647 -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i64.const 2147483647 -; CHECK-NEXT: i64.lt_s -; CHECK-NEXT: i64.select -; CHECK-NEXT: local.tee 1 -; CHECK-NEXT: i64.const -2147483648 -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i64.const -2147483648 -; CHECK-NEXT: i64.gt_s -; CHECK-NEXT: i64.select -; CHECK-NEXT: i32.wrap_i64 +; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: # fallthrough-return entry: %conv = fptosi half %x to i64 @@ -523,58 +481,9 @@ define i64 @stest_f64i64(double %x) { ; CHECK-LABEL: stest_f64i64: ; CHECK: .functype stest_f64i64 (f64) -> (i64) -; CHECK-NEXT: .local i32, i64, i64 ; CHECK-NEXT: # %bb.0: # %entry -; CHECK-NEXT: global.get __stack_pointer -; CHECK-NEXT: i32.const 16 -; CHECK-NEXT: i32.sub -; CHECK-NEXT: local.tee 1 -; CHECK-NEXT: global.set __stack_pointer -; CHECK-NEXT: local.get 1 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __fixdfti -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i32.const 8 -; CHECK-NEXT: i32.add -; CHECK-NEXT: i64.load 0 -; CHECK-NEXT: local.set 2 -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i64.load 0 -; CHECK-NEXT: local.set 3 -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i32.const 16 -; CHECK-NEXT: i32.add -; CHECK-NEXT: global.set __stack_pointer -; CHECK-NEXT: local.get 3 -; CHECK-NEXT: i64.const 9223372036854775807 -; CHECK-NEXT: local.get 3 -; CHECK-NEXT: i64.const 9223372036854775807 -; CHECK-NEXT: i64.lt_u -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.const 0 -; CHECK-NEXT: i64.lt_s -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.eqz -; CHECK-NEXT: i32.select -; CHECK-NEXT: local.tee 1 -; CHECK-NEXT: i64.select -; CHECK-NEXT: local.tee 3 -; CHECK-NEXT: i64.const -9223372036854775808 -; CHECK-NEXT: local.get 3 -; CHECK-NEXT: i64.const -9223372036854775808 -; CHECK-NEXT: i64.gt_u -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.const 0 -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i64.select -; CHECK-NEXT: local.tee 2 -; CHECK-NEXT: i64.const -1 -; CHECK-NEXT: i64.gt_s -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.const -1 -; CHECK-NEXT: i64.eq -; CHECK-NEXT: i32.select -; CHECK-NEXT: i64.select +; CHECK-NEXT: i64.trunc_sat_f64_s ; CHECK-NEXT: # fallthrough-return entry: %conv = fptosi double %x to i128 @@ -687,58 +596,9 @@ define i64 @stest_f32i64(float %x) { ; CHECK-LABEL: stest_f32i64: ; CHECK: .functype stest_f32i64 (f32) -> (i64) -; CHECK-NEXT: .local i32, i64, i64 ; CHECK-NEXT: # %bb.0: # %entry -; CHECK-NEXT: global.get __stack_pointer -; CHECK-NEXT: i32.const 16 -; CHECK-NEXT: i32.sub -; CHECK-NEXT: local.tee 1 -; CHECK-NEXT: global.set __stack_pointer -; CHECK-NEXT: local.get 1 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __fixsfti -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i32.const 8 -; CHECK-NEXT: i32.add -; CHECK-NEXT: i64.load 0 -; CHECK-NEXT: local.set 2 -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i64.load 0 -; CHECK-NEXT: local.set 3 -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i32.const 16 -; CHECK-NEXT: i32.add -; CHECK-NEXT: global.set __stack_pointer -; CHECK-NEXT: local.get 3 -; CHECK-NEXT: i64.const 9223372036854775807 -; CHECK-NEXT: local.get 3 -; CHECK-NEXT: i64.const 9223372036854775807 -; CHECK-NEXT: i64.lt_u -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.const 0 -; CHECK-NEXT: i64.lt_s -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.eqz -; CHECK-NEXT: i32.select -; CHECK-NEXT: local.tee 1 -; CHECK-NEXT: i64.select -; CHECK-NEXT: local.tee 3 -; CHECK-NEXT: i64.const -9223372036854775808 -; CHECK-NEXT: local.get 3 -; CHECK-NEXT: i64.const -9223372036854775808 -; CHECK-NEXT: i64.gt_u -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.const 0 -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i64.select -; CHECK-NEXT: local.tee 2 -; CHECK-NEXT: i64.const -1 -; CHECK-NEXT: i64.gt_s -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.const -1 -; CHECK-NEXT: i64.eq -; CHECK-NEXT: i32.select -; CHECK-NEXT: i64.select +; CHECK-NEXT: i64.trunc_sat_f32_s ; CHECK-NEXT: # fallthrough-return entry: %conv = fptosi float %x to i128 @@ -851,60 +711,11 @@ define i64 @stest_f16i64(half %x) { ; CHECK-LABEL: stest_f16i64: ; CHECK: .functype stest_f16i64 (f32) -> (i64) -; CHECK-NEXT: .local i32, i64, i64 ; CHECK-NEXT: # %bb.0: # %entry -; CHECK-NEXT: global.get __stack_pointer -; CHECK-NEXT: i32.const 16 -; CHECK-NEXT: i32.sub -; CHECK-NEXT: local.tee 1 -; CHECK-NEXT: global.set __stack_pointer -; CHECK-NEXT: local.get 1 ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: call __fixsfti -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i32.const 8 -; CHECK-NEXT: i32.add -; CHECK-NEXT: i64.load 0 -; CHECK-NEXT: local.set 2 -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i64.load 0 -; CHECK-NEXT: local.set 3 -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i32.const 16 -; CHECK-NEXT: i32.add -; CHECK-NEXT: global.set __stack_pointer -; CHECK-NEXT: local.get 3 -; CHECK-NEXT: i64.const 9223372036854775807 -; CHECK-NEXT: local.get 3 -; CHECK-NEXT: i64.const 9223372036854775807 -; CHECK-NEXT: i64.lt_u -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.const 0 -; CHECK-NEXT: i64.lt_s -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.eqz -; CHECK-NEXT: i32.select -; CHECK-NEXT: local.tee 1 -; CHECK-NEXT: i64.select -; CHECK-NEXT: local.tee 3 -; CHECK-NEXT: i64.const -9223372036854775808 -; CHECK-NEXT: local.get 3 -; CHECK-NEXT: i64.const -9223372036854775808 -; CHECK-NEXT: i64.gt_u -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.const 0 -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i64.select -; CHECK-NEXT: local.tee 2 -; CHECK-NEXT: i64.const -1 -; CHECK-NEXT: i64.gt_s -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.const -1 -; CHECK-NEXT: i64.eq -; CHECK-NEXT: i32.select -; CHECK-NEXT: i64.select +; CHECK-NEXT: i64.trunc_sat_f32_s ; CHECK-NEXT: # fallthrough-return entry: %conv = fptosi half %x to i128 @@ -1026,23 +837,9 @@ define i32 @stest_f64i32_mm(double %x) { ; CHECK-LABEL: stest_f64i32_mm: ; CHECK: .functype stest_f64i32_mm (f64) -> (i32) -; CHECK-NEXT: .local i64 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: i64.trunc_sat_f64_s -; CHECK-NEXT: local.tee 1 -; CHECK-NEXT: i64.const 2147483647 -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i64.const 2147483647 -; CHECK-NEXT: i64.lt_s -; CHECK-NEXT: i64.select -; CHECK-NEXT: local.tee 1 -; CHECK-NEXT: i64.const -2147483648 -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i64.const -2147483648 -; CHECK-NEXT: i64.gt_s -; CHECK-NEXT: i64.select -; CHECK-NEXT: i32.wrap_i64 +; CHECK-NEXT: i32.trunc_sat_f64_s ; CHECK-NEXT: # fallthrough-return entry: %conv = fptosi double %x to i64 @@ -1106,23 +903,9 @@ define i32 @stest_f32i32_mm(float %x) { ; CHECK-LABEL: stest_f32i32_mm: ; CHECK: .functype stest_f32i32_mm (f32) -> (i32) -; CHECK-NEXT: .local i64 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: i64.trunc_sat_f32_s -; CHECK-NEXT: local.tee 1 -; CHECK-NEXT: i64.const 2147483647 -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i64.const 2147483647 -; CHECK-NEXT: i64.lt_s -; CHECK-NEXT: i64.select -; CHECK-NEXT: local.tee 1 -; CHECK-NEXT: i64.const -2147483648 -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i64.const -2147483648 -; CHECK-NEXT: i64.gt_s -; CHECK-NEXT: i64.select -; CHECK-NEXT: i32.wrap_i64 +; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: # fallthrough-return entry: %conv = fptosi float %x to i64 @@ -1186,25 +969,11 @@ define i32 @stest_f16i32_mm(half %x) { ; CHECK-LABEL: stest_f16i32_mm: ; CHECK: .functype stest_f16i32_mm (f32) -> (i32) -; CHECK-NEXT: .local i64 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: i64.trunc_sat_f32_s -; CHECK-NEXT: local.tee 1 -; CHECK-NEXT: i64.const 2147483647 -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i64.const 2147483647 -; CHECK-NEXT: i64.lt_s -; CHECK-NEXT: i64.select -; CHECK-NEXT: local.tee 1 -; CHECK-NEXT: i64.const -2147483648 -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i64.const -2147483648 -; CHECK-NEXT: i64.gt_s -; CHECK-NEXT: i64.select -; CHECK-NEXT: i32.wrap_i64 +; CHECK-NEXT: i32.trunc_sat_f32_s ; CHECK-NEXT: # fallthrough-return entry: %conv = fptosi half %x to i64 @@ -1513,64 +1282,9 @@ define i64 @stest_f64i64_mm(double %x) { ; CHECK-LABEL: stest_f64i64_mm: ; CHECK: .functype stest_f64i64_mm (f64) -> (i64) -; CHECK-NEXT: .local i32, i64, i64 ; CHECK-NEXT: # %bb.0: # %entry -; CHECK-NEXT: global.get __stack_pointer -; CHECK-NEXT: i32.const 16 -; CHECK-NEXT: i32.sub -; CHECK-NEXT: local.tee 1 -; CHECK-NEXT: global.set __stack_pointer -; CHECK-NEXT: local.get 1 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __fixdfti -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i32.const 8 -; CHECK-NEXT: i32.add -; CHECK-NEXT: i64.load 0 -; CHECK-NEXT: local.set 2 -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i64.load 0 -; CHECK-NEXT: local.set 3 -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i32.const 16 -; CHECK-NEXT: i32.add -; CHECK-NEXT: global.set __stack_pointer -; CHECK-NEXT: local.get 3 -; CHECK-NEXT: i64.const 9223372036854775807 -; CHECK-NEXT: local.get 3 -; CHECK-NEXT: i64.const 9223372036854775807 -; CHECK-NEXT: i64.lt_u -; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 3 -; CHECK-NEXT: i64.const 9223372036854775807 -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.const 0 -; CHECK-NEXT: i64.lt_s -; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.eqz -; CHECK-NEXT: i64.select -; CHECK-NEXT: local.tee 3 -; CHECK-NEXT: i64.const -9223372036854775808 -; CHECK-NEXT: local.get 3 -; CHECK-NEXT: i64.const -9223372036854775808 -; CHECK-NEXT: i64.gt_u -; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 3 -; CHECK-NEXT: i64.const -9223372036854775808 -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.const 63 -; CHECK-NEXT: i64.shr_s -; CHECK-NEXT: i64.and -; CHECK-NEXT: local.tee 2 -; CHECK-NEXT: i64.const -1 -; CHECK-NEXT: i64.gt_s -; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.const -1 -; CHECK-NEXT: i64.eq -; CHECK-NEXT: i64.select +; CHECK-NEXT: i64.trunc_sat_f64_s ; CHECK-NEXT: # fallthrough-return entry: %conv = fptosi double %x to i128 @@ -1686,64 +1400,9 @@ define i64 @stest_f32i64_mm(float %x) { ; CHECK-LABEL: stest_f32i64_mm: ; CHECK: .functype stest_f32i64_mm (f32) -> (i64) -; CHECK-NEXT: .local i32, i64, i64 ; CHECK-NEXT: # %bb.0: # %entry -; CHECK-NEXT: global.get __stack_pointer -; CHECK-NEXT: i32.const 16 -; CHECK-NEXT: i32.sub -; CHECK-NEXT: local.tee 1 -; CHECK-NEXT: global.set __stack_pointer -; CHECK-NEXT: local.get 1 ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: call __fixsfti -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i32.const 8 -; CHECK-NEXT: i32.add -; CHECK-NEXT: i64.load 0 -; CHECK-NEXT: local.set 2 -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i64.load 0 -; CHECK-NEXT: local.set 3 -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i32.const 16 -; CHECK-NEXT: i32.add -; CHECK-NEXT: global.set __stack_pointer -; CHECK-NEXT: local.get 3 -; CHECK-NEXT: i64.const 9223372036854775807 -; CHECK-NEXT: local.get 3 -; CHECK-NEXT: i64.const 9223372036854775807 -; CHECK-NEXT: i64.lt_u -; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 3 -; CHECK-NEXT: i64.const 9223372036854775807 -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.const 0 -; CHECK-NEXT: i64.lt_s -; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.eqz -; CHECK-NEXT: i64.select -; CHECK-NEXT: local.tee 3 -; CHECK-NEXT: i64.const -9223372036854775808 -; CHECK-NEXT: local.get 3 -; CHECK-NEXT: i64.const -9223372036854775808 -; CHECK-NEXT: i64.gt_u -; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 3 -; CHECK-NEXT: i64.const -9223372036854775808 -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.const 63 -; CHECK-NEXT: i64.shr_s -; CHECK-NEXT: i64.and -; CHECK-NEXT: local.tee 2 -; CHECK-NEXT: i64.const -1 -; CHECK-NEXT: i64.gt_s -; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.const -1 -; CHECK-NEXT: i64.eq -; CHECK-NEXT: i64.select +; CHECK-NEXT: i64.trunc_sat_f32_s ; CHECK-NEXT: # fallthrough-return entry: %conv = fptosi float %x to i128 @@ -1859,66 +1518,11 @@ define i64 @stest_f16i64_mm(half %x) { ; CHECK-LABEL: stest_f16i64_mm: ; CHECK: .functype stest_f16i64_mm (f32) -> (i64) -; CHECK-NEXT: .local i32, i64, i64 ; CHECK-NEXT: # %bb.0: # %entry -; CHECK-NEXT: global.get __stack_pointer -; CHECK-NEXT: i32.const 16 -; CHECK-NEXT: i32.sub -; CHECK-NEXT: local.tee 1 -; CHECK-NEXT: global.set __stack_pointer -; CHECK-NEXT: local.get 1 ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: call __fixsfti -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i32.const 8 -; CHECK-NEXT: i32.add -; CHECK-NEXT: i64.load 0 -; CHECK-NEXT: local.set 2 -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i64.load 0 -; CHECK-NEXT: local.set 3 -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i32.const 16 -; CHECK-NEXT: i32.add -; CHECK-NEXT: global.set __stack_pointer -; CHECK-NEXT: local.get 3 -; CHECK-NEXT: i64.const 9223372036854775807 -; CHECK-NEXT: local.get 3 -; CHECK-NEXT: i64.const 9223372036854775807 -; CHECK-NEXT: i64.lt_u -; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 3 -; CHECK-NEXT: i64.const 9223372036854775807 -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.const 0 -; CHECK-NEXT: i64.lt_s -; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.eqz -; CHECK-NEXT: i64.select -; CHECK-NEXT: local.tee 3 -; CHECK-NEXT: i64.const -9223372036854775808 -; CHECK-NEXT: local.get 3 -; CHECK-NEXT: i64.const -9223372036854775808 -; CHECK-NEXT: i64.gt_u -; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 3 -; CHECK-NEXT: i64.const -9223372036854775808 -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.const 63 -; CHECK-NEXT: i64.shr_s -; CHECK-NEXT: i64.and -; CHECK-NEXT: local.tee 2 -; CHECK-NEXT: i64.const -1 -; CHECK-NEXT: i64.gt_s -; CHECK-NEXT: i64.select -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.const -1 -; CHECK-NEXT: i64.eq -; CHECK-NEXT: i64.select +; CHECK-NEXT: i64.trunc_sat_f32_s ; CHECK-NEXT: # fallthrough-return entry: %conv = fptosi half %x to i128 Index: llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll =================================================================== --- llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll +++ llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll @@ -128,51 +128,9 @@ define <4 x i32> @stest_f32i32(<4 x float> %x) { ; CHECK-LABEL: stest_f32i32: ; CHECK: .functype stest_f32i32 (v128) -> (v128) -; CHECK-NEXT: .local v128, v128, v128 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: f32x4.extract_lane 0 -; CHECK-NEXT: i64.trunc_sat_f32_s -; CHECK-NEXT: i64x2.splat -; CHECK-NEXT: local.get 0 -; CHECK-NEXT: f32x4.extract_lane 1 -; CHECK-NEXT: i64.trunc_sat_f32_s -; CHECK-NEXT: i64x2.replace_lane 1 -; CHECK-NEXT: local.tee 1 -; CHECK-NEXT: v128.const 2147483647, 2147483647 -; CHECK-NEXT: local.tee 2 -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64x2.lt_s -; CHECK-NEXT: v128.bitselect -; CHECK-NEXT: local.tee 3 -; CHECK-NEXT: v128.const -2147483648, -2147483648 -; CHECK-NEXT: local.tee 1 -; CHECK-NEXT: local.get 3 -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i64x2.gt_s -; CHECK-NEXT: v128.bitselect -; CHECK-NEXT: local.get 0 -; CHECK-NEXT: f32x4.extract_lane 2 -; CHECK-NEXT: i64.trunc_sat_f32_s -; CHECK-NEXT: i64x2.splat -; CHECK-NEXT: local.get 0 -; CHECK-NEXT: f32x4.extract_lane 3 -; CHECK-NEXT: i64.trunc_sat_f32_s -; CHECK-NEXT: i64x2.replace_lane 1 -; CHECK-NEXT: local.tee 0 -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: local.get 0 -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64x2.lt_s -; CHECK-NEXT: v128.bitselect -; CHECK-NEXT: local.tee 0 -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: local.get 0 -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i64x2.gt_s -; CHECK-NEXT: v128.bitselect -; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27 +; CHECK-NEXT: i32x4.trunc_sat_f32x4_s ; CHECK-NEXT: # fallthrough-return entry: %conv = fptosi <4 x float> %x to <4 x i64> @@ -314,16 +272,7 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) { ; CHECK-LABEL: stest_f16i32: ; CHECK: .functype stest_f16i32 (f32, f32, f32, f32) -> (v128) -; CHECK-NEXT: .local v128, v128, v128 ; CHECK-NEXT: # %bb.0: # %entry -; CHECK-NEXT: local.get 3 -; CHECK-NEXT: call __truncsfhf2 -; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 3 -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: call __truncsfhf2 -; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 2 ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 @@ -331,44 +280,21 @@ ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: i64.trunc_sat_f32_s -; CHECK-NEXT: i64x2.splat +; CHECK-NEXT: i32.trunc_sat_f32_s +; CHECK-NEXT: i32x4.splat ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i64.trunc_sat_f32_s -; CHECK-NEXT: i64x2.replace_lane 1 -; CHECK-NEXT: local.tee 4 -; CHECK-NEXT: v128.const 2147483647, 2147483647 -; CHECK-NEXT: local.tee 5 -; CHECK-NEXT: local.get 4 -; CHECK-NEXT: local.get 5 -; CHECK-NEXT: i64x2.lt_s -; CHECK-NEXT: v128.bitselect -; CHECK-NEXT: local.tee 6 -; CHECK-NEXT: v128.const -2147483648, -2147483648 -; CHECK-NEXT: local.tee 4 -; CHECK-NEXT: local.get 6 -; CHECK-NEXT: local.get 4 -; CHECK-NEXT: i64x2.gt_s -; CHECK-NEXT: v128.bitselect +; CHECK-NEXT: i32.trunc_sat_f32_s +; CHECK-NEXT: i32x4.replace_lane 1 ; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.trunc_sat_f32_s -; CHECK-NEXT: i64x2.splat +; CHECK-NEXT: call __truncsfhf2 +; CHECK-NEXT: call __extendhfsf2 +; CHECK-NEXT: i32.trunc_sat_f32_s +; CHECK-NEXT: i32x4.replace_lane 2 ; CHECK-NEXT: local.get 3 -; CHECK-NEXT: i64.trunc_sat_f32_s -; CHECK-NEXT: i64x2.replace_lane 1 -; CHECK-NEXT: local.tee 6 -; CHECK-NEXT: local.get 5 -; CHECK-NEXT: local.get 6 -; CHECK-NEXT: local.get 5 -; CHECK-NEXT: i64x2.lt_s -; CHECK-NEXT: v128.bitselect -; CHECK-NEXT: local.tee 5 -; CHECK-NEXT: local.get 4 -; CHECK-NEXT: local.get 5 -; CHECK-NEXT: local.get 4 -; CHECK-NEXT: i64x2.gt_s -; CHECK-NEXT: v128.bitselect -; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27 +; CHECK-NEXT: call __truncsfhf2 +; CHECK-NEXT: call __extendhfsf2 +; CHECK-NEXT: i32.trunc_sat_f32_s +; CHECK-NEXT: i32x4.replace_lane 3 ; CHECK-NEXT: # fallthrough-return entry: %conv = fptosi <4 x half> %x to <4 x i64> @@ -1870,51 +1796,9 @@ define <4 x i32> @stest_f32i32_mm(<4 x float> %x) { ; CHECK-LABEL: stest_f32i32_mm: ; CHECK: .functype stest_f32i32_mm (v128) -> (v128) -; CHECK-NEXT: .local v128, v128, v128 ; CHECK-NEXT: # %bb.0: # %entry ; CHECK-NEXT: local.get 0 -; CHECK-NEXT: f32x4.extract_lane 0 -; CHECK-NEXT: i64.trunc_sat_f32_s -; CHECK-NEXT: i64x2.splat -; CHECK-NEXT: local.get 0 -; CHECK-NEXT: f32x4.extract_lane 1 -; CHECK-NEXT: i64.trunc_sat_f32_s -; CHECK-NEXT: i64x2.replace_lane 1 -; CHECK-NEXT: local.tee 1 -; CHECK-NEXT: v128.const 2147483647, 2147483647 -; CHECK-NEXT: local.tee 2 -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64x2.lt_s -; CHECK-NEXT: v128.bitselect -; CHECK-NEXT: local.tee 3 -; CHECK-NEXT: v128.const -2147483648, -2147483648 -; CHECK-NEXT: local.tee 1 -; CHECK-NEXT: local.get 3 -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i64x2.gt_s -; CHECK-NEXT: v128.bitselect -; CHECK-NEXT: local.get 0 -; CHECK-NEXT: f32x4.extract_lane 2 -; CHECK-NEXT: i64.trunc_sat_f32_s -; CHECK-NEXT: i64x2.splat -; CHECK-NEXT: local.get 0 -; CHECK-NEXT: f32x4.extract_lane 3 -; CHECK-NEXT: i64.trunc_sat_f32_s -; CHECK-NEXT: i64x2.replace_lane 1 -; CHECK-NEXT: local.tee 0 -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: local.get 0 -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64x2.lt_s -; CHECK-NEXT: v128.bitselect -; CHECK-NEXT: local.tee 0 -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: local.get 0 -; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i64x2.gt_s -; CHECK-NEXT: v128.bitselect -; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27 +; CHECK-NEXT: i32x4.trunc_sat_f32x4_s ; CHECK-NEXT: # fallthrough-return entry: %conv = fptosi <4 x float> %x to <4 x i64> @@ -2051,16 +1935,7 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) { ; CHECK-LABEL: stest_f16i32_mm: ; CHECK: .functype stest_f16i32_mm (f32, f32, f32, f32) -> (v128) -; CHECK-NEXT: .local v128, v128, v128 ; CHECK-NEXT: # %bb.0: # %entry -; CHECK-NEXT: local.get 3 -; CHECK-NEXT: call __truncsfhf2 -; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 3 -; CHECK-NEXT: local.get 2 -; CHECK-NEXT: call __truncsfhf2 -; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: local.set 2 ; CHECK-NEXT: local.get 1 ; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 @@ -2068,44 +1943,21 @@ ; CHECK-NEXT: local.get 0 ; CHECK-NEXT: call __truncsfhf2 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: i64.trunc_sat_f32_s -; CHECK-NEXT: i64x2.splat +; CHECK-NEXT: i32.trunc_sat_f32_s +; CHECK-NEXT: i32x4.splat ; CHECK-NEXT: local.get 1 -; CHECK-NEXT: i64.trunc_sat_f32_s -; CHECK-NEXT: i64x2.replace_lane 1 -; CHECK-NEXT: local.tee 4 -; CHECK-NEXT: v128.const 2147483647, 2147483647 -; CHECK-NEXT: local.tee 5 -; CHECK-NEXT: local.get 4 -; CHECK-NEXT: local.get 5 -; CHECK-NEXT: i64x2.lt_s -; CHECK-NEXT: v128.bitselect -; CHECK-NEXT: local.tee 6 -; CHECK-NEXT: v128.const -2147483648, -2147483648 -; CHECK-NEXT: local.tee 4 -; CHECK-NEXT: local.get 6 -; CHECK-NEXT: local.get 4 -; CHECK-NEXT: i64x2.gt_s -; CHECK-NEXT: v128.bitselect +; CHECK-NEXT: i32.trunc_sat_f32_s +; CHECK-NEXT: i32x4.replace_lane 1 ; CHECK-NEXT: local.get 2 -; CHECK-NEXT: i64.trunc_sat_f32_s -; CHECK-NEXT: i64x2.splat +; CHECK-NEXT: call __truncsfhf2 +; CHECK-NEXT: call __extendhfsf2 +; CHECK-NEXT: i32.trunc_sat_f32_s +; CHECK-NEXT: i32x4.replace_lane 2 ; CHECK-NEXT: local.get 3 -; CHECK-NEXT: i64.trunc_sat_f32_s -; CHECK-NEXT: i64x2.replace_lane 1 -; CHECK-NEXT: local.tee 6 -; CHECK-NEXT: local.get 5 -; CHECK-NEXT: local.get 6 -; CHECK-NEXT: local.get 5 -; CHECK-NEXT: i64x2.lt_s -; CHECK-NEXT: v128.bitselect -; CHECK-NEXT: local.tee 5 -; CHECK-NEXT: local.get 4 -; CHECK-NEXT: local.get 5 -; CHECK-NEXT: local.get 4 -; CHECK-NEXT: i64x2.gt_s -; CHECK-NEXT: v128.bitselect -; CHECK-NEXT: i8x16.shuffle 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, 24, 25, 26, 27 +; CHECK-NEXT: call __truncsfhf2 +; CHECK-NEXT: call __extendhfsf2 +; CHECK-NEXT: i32.trunc_sat_f32_s +; CHECK-NEXT: i32x4.replace_lane 3 ; CHECK-NEXT: # fallthrough-return entry: %conv = fptosi <4 x half> %x to <4 x i64> Index: llvm/test/CodeGen/X86/fpclamptosat.ll =================================================================== --- llvm/test/CodeGen/X86/fpclamptosat.ll +++ llvm/test/CodeGen/X86/fpclamptosat.ll @@ -6,13 +6,12 @@ define i32 @stest_f64i32(double %x) { ; CHECK-LABEL: stest_f64i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: cvttsd2si %xmm0, %rax -; CHECK-NEXT: cmpq $2147483647, %rax # imm = 0x7FFFFFFF -; CHECK-NEXT: movl $2147483647, %ecx # imm = 0x7FFFFFFF -; CHECK-NEXT: cmovlq %rax, %rcx -; CHECK-NEXT: cmpq $-2147483647, %rcx # imm = 0x80000001 -; CHECK-NEXT: movl $-2147483648, %eax # imm = 0x80000000 -; CHECK-NEXT: cmovgel %ecx, %eax +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: ucomisd %xmm0, %xmm0 +; CHECK-NEXT: maxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-NEXT: minsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-NEXT: cvttsd2si %xmm0, %ecx +; CHECK-NEXT: cmovnpl %ecx, %eax ; CHECK-NEXT: retq entry: %conv = fptosi double %x to i64 @@ -72,13 +71,13 @@ define i32 @stest_f32i32(float %x) { ; CHECK-LABEL: stest_f32i32: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: cvttss2si %xmm0, %rax -; CHECK-NEXT: cmpq $2147483647, %rax # imm = 0x7FFFFFFF +; CHECK-NEXT: cvttss2si %xmm0, %eax +; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-NEXT: movl $2147483647, %ecx # imm = 0x7FFFFFFF -; CHECK-NEXT: cmovlq %rax, %rcx -; CHECK-NEXT: cmpq $-2147483647, %rcx # imm = 0x80000001 -; CHECK-NEXT: movl $-2147483648, %eax # imm = 0x80000000 -; CHECK-NEXT: cmovgel %ecx, %eax +; CHECK-NEXT: cmovbel %eax, %ecx +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: ucomiss %xmm0, %xmm0 +; CHECK-NEXT: cmovnpl %ecx, %eax ; CHECK-NEXT: retq entry: %conv = fptosi float %x to i64 @@ -142,13 +141,13 @@ ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: movzwl %di, %edi ; CHECK-NEXT: callq __gnu_h2f_ieee@PLT -; CHECK-NEXT: cvttss2si %xmm0, %rax -; CHECK-NEXT: cmpq $2147483647, %rax # imm = 0x7FFFFFFF +; CHECK-NEXT: cvttss2si %xmm0, %eax +; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-NEXT: movl $2147483647, %ecx # imm = 0x7FFFFFFF -; CHECK-NEXT: cmovlq %rax, %rcx -; CHECK-NEXT: cmpq $-2147483647, %rcx # imm = 0x80000001 -; CHECK-NEXT: movl $-2147483648, %eax # imm = 0x80000000 -; CHECK-NEXT: cmovgel %ecx, %eax +; CHECK-NEXT: cmovbel %eax, %ecx +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: ucomiss %xmm0, %xmm0 +; CHECK-NEXT: cmovnpl %ecx, %eax ; CHECK-NEXT: popq %rcx ; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq @@ -224,13 +223,11 @@ define i16 @stest_f64i16(double %x) { ; CHECK-LABEL: stest_f64i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; CHECK-NEXT: maxsd %xmm0, %xmm1 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: minsd %xmm1, %xmm0 ; CHECK-NEXT: cvttsd2si %xmm0, %eax -; CHECK-NEXT: cmpl $32767, %eax # imm = 0x7FFF -; CHECK-NEXT: movl $32767, %ecx # imm = 0x7FFF -; CHECK-NEXT: cmovll %eax, %ecx -; CHECK-NEXT: cmpl $-32767, %ecx # imm = 0x8001 -; CHECK-NEXT: movl $32768, %eax # imm = 0x8000 -; CHECK-NEXT: cmovgel %ecx, %eax ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq entry: @@ -285,13 +282,11 @@ define i16 @stest_f32i16(float %x) { ; CHECK-LABEL: stest_f32i16: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: maxss %xmm0, %xmm1 +; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: minss %xmm1, %xmm0 ; CHECK-NEXT: cvttss2si %xmm0, %eax -; CHECK-NEXT: cmpl $32767, %eax # imm = 0x7FFF -; CHECK-NEXT: movl $32767, %ecx # imm = 0x7FFF -; CHECK-NEXT: cmovll %eax, %ecx -; CHECK-NEXT: cmpl $-32767, %ecx # imm = 0x8001 -; CHECK-NEXT: movl $32768, %eax # imm = 0x8000 -; CHECK-NEXT: cmovgel %ecx, %eax ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq entry: @@ -350,13 +345,11 @@ ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: movzwl %di, %edi ; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: maxss %xmm0, %xmm1 +; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: minss %xmm1, %xmm0 ; CHECK-NEXT: cvttss2si %xmm0, %eax -; CHECK-NEXT: cmpl $32767, %eax # imm = 0x7FFF -; CHECK-NEXT: movl $32767, %ecx # imm = 0x7FFF -; CHECK-NEXT: cmovll %eax, %ecx -; CHECK-NEXT: cmpl $-32767, %ecx # imm = 0x8001 -; CHECK-NEXT: movl $32768, %eax # imm = 0x8000 -; CHECK-NEXT: cmovgel %ecx, %eax ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: popq %rcx ; CHECK-NEXT: .cfi_def_cfa_offset 8 @@ -427,24 +420,13 @@ define i64 @stest_f64i64(double %x) { ; CHECK-LABEL: stest_f64i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: callq __fixdfti@PLT -; CHECK-NEXT: xorl %esi, %esi +; CHECK-NEXT: cvttsd2si %xmm0, %rax +; CHECK-NEXT: ucomisd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF -; CHECK-NEXT: cmpq %rcx, %rax -; CHECK-NEXT: movq %rdx, %rdi -; CHECK-NEXT: sbbq $0, %rdi -; CHECK-NEXT: cmovlq %rdx, %rsi -; CHECK-NEXT: cmovlq %rax, %rcx -; CHECK-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 -; CHECK-NEXT: cmpq %rcx, %rax -; CHECK-NEXT: movq $-1, %rdx -; CHECK-NEXT: sbbq %rsi, %rdx -; CHECK-NEXT: cmovgeq %rax, %rcx -; CHECK-NEXT: movq %rcx, %rax -; CHECK-NEXT: popq %rcx -; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: cmovbeq %rax, %rcx +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: ucomisd %xmm0, %xmm0 +; CHECK-NEXT: cmovnpq %rcx, %rax ; CHECK-NEXT: retq entry: %conv = fptosi double %x to i128 @@ -508,24 +490,13 @@ define i64 @stest_f32i64(float %x) { ; CHECK-LABEL: stest_f32i64: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: callq __fixsfti@PLT -; CHECK-NEXT: xorl %esi, %esi +; CHECK-NEXT: cvttss2si %xmm0, %rax +; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF -; CHECK-NEXT: cmpq %rcx, %rax -; CHECK-NEXT: movq %rdx, %rdi -; CHECK-NEXT: sbbq $0, %rdi -; CHECK-NEXT: cmovlq %rdx, %rsi -; CHECK-NEXT: cmovlq %rax, %rcx -; CHECK-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 -; CHECK-NEXT: cmpq %rcx, %rax -; CHECK-NEXT: movq $-1, %rdx -; CHECK-NEXT: sbbq %rsi, %rdx -; CHECK-NEXT: cmovgeq %rax, %rcx -; CHECK-NEXT: movq %rcx, %rax -; CHECK-NEXT: popq %rcx -; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: cmovbeq %rax, %rcx +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: ucomiss %xmm0, %xmm0 +; CHECK-NEXT: cmovnpq %rcx, %rax ; CHECK-NEXT: retq entry: %conv = fptosi float %x to i128 @@ -593,20 +564,13 @@ ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: movzwl %di, %edi ; CHECK-NEXT: callq __gnu_h2f_ieee@PLT -; CHECK-NEXT: callq __fixsfti@PLT -; CHECK-NEXT: xorl %esi, %esi +; CHECK-NEXT: cvttss2si %xmm0, %rax +; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF -; CHECK-NEXT: cmpq %rcx, %rax -; CHECK-NEXT: movq %rdx, %rdi -; CHECK-NEXT: sbbq $0, %rdi -; CHECK-NEXT: cmovlq %rdx, %rsi -; CHECK-NEXT: cmovlq %rax, %rcx -; CHECK-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 -; CHECK-NEXT: cmpq %rcx, %rax -; CHECK-NEXT: movq $-1, %rdx -; CHECK-NEXT: sbbq %rsi, %rdx -; CHECK-NEXT: cmovgeq %rax, %rcx -; CHECK-NEXT: movq %rcx, %rax +; CHECK-NEXT: cmovbeq %rax, %rcx +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: ucomiss %xmm0, %xmm0 +; CHECK-NEXT: cmovnpq %rcx, %rax ; CHECK-NEXT: popq %rcx ; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq @@ -681,14 +645,12 @@ define i32 @stest_f64i32_mm(double %x) { ; CHECK-LABEL: stest_f64i32_mm: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: cvttsd2si %xmm0, %rax -; CHECK-NEXT: cmpq $2147483647, %rax # imm = 0x7FFFFFFF -; CHECK-NEXT: movl $2147483647, %ecx # imm = 0x7FFFFFFF -; CHECK-NEXT: cmovlq %rax, %rcx -; CHECK-NEXT: cmpq $-2147483647, %rcx # imm = 0x80000001 -; CHECK-NEXT: movq $-2147483648, %rax # imm = 0x80000000 -; CHECK-NEXT: cmovgeq %rcx, %rax -; CHECK-NEXT: # kill: def $eax killed $eax killed $rax +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: ucomisd %xmm0, %xmm0 +; CHECK-NEXT: maxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-NEXT: minsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; CHECK-NEXT: cvttsd2si %xmm0, %ecx +; CHECK-NEXT: cmovnpl %ecx, %eax ; CHECK-NEXT: retq entry: %conv = fptosi double %x to i64 @@ -743,14 +705,13 @@ define i32 @stest_f32i32_mm(float %x) { ; CHECK-LABEL: stest_f32i32_mm: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: cvttss2si %xmm0, %rax -; CHECK-NEXT: cmpq $2147483647, %rax # imm = 0x7FFFFFFF +; CHECK-NEXT: cvttss2si %xmm0, %eax +; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-NEXT: movl $2147483647, %ecx # imm = 0x7FFFFFFF -; CHECK-NEXT: cmovlq %rax, %rcx -; CHECK-NEXT: cmpq $-2147483647, %rcx # imm = 0x80000001 -; CHECK-NEXT: movq $-2147483648, %rax # imm = 0x80000000 -; CHECK-NEXT: cmovgeq %rcx, %rax -; CHECK-NEXT: # kill: def $eax killed $eax killed $rax +; CHECK-NEXT: cmovbel %eax, %ecx +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: ucomiss %xmm0, %xmm0 +; CHECK-NEXT: cmovnpl %ecx, %eax ; CHECK-NEXT: retq entry: %conv = fptosi float %x to i64 @@ -809,14 +770,13 @@ ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: movzwl %di, %edi ; CHECK-NEXT: callq __gnu_h2f_ieee@PLT -; CHECK-NEXT: cvttss2si %xmm0, %rax -; CHECK-NEXT: cmpq $2147483647, %rax # imm = 0x7FFFFFFF +; CHECK-NEXT: cvttss2si %xmm0, %eax +; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-NEXT: movl $2147483647, %ecx # imm = 0x7FFFFFFF -; CHECK-NEXT: cmovlq %rax, %rcx -; CHECK-NEXT: cmpq $-2147483647, %rcx # imm = 0x80000001 -; CHECK-NEXT: movq $-2147483648, %rax # imm = 0x80000000 -; CHECK-NEXT: cmovgeq %rcx, %rax -; CHECK-NEXT: # kill: def $eax killed $eax killed $rax +; CHECK-NEXT: cmovbel %eax, %ecx +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: ucomiss %xmm0, %xmm0 +; CHECK-NEXT: cmovnpl %ecx, %eax ; CHECK-NEXT: popq %rcx ; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq @@ -887,13 +847,11 @@ define i16 @stest_f64i16_mm(double %x) { ; CHECK-LABEL: stest_f64i16_mm: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; CHECK-NEXT: maxsd %xmm0, %xmm1 +; CHECK-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; CHECK-NEXT: minsd %xmm1, %xmm0 ; CHECK-NEXT: cvttsd2si %xmm0, %eax -; CHECK-NEXT: cmpl $32767, %eax # imm = 0x7FFF -; CHECK-NEXT: movl $32767, %ecx # imm = 0x7FFF -; CHECK-NEXT: cmovll %eax, %ecx -; CHECK-NEXT: cmpl $-32767, %ecx # imm = 0x8001 -; CHECK-NEXT: movl $-32768, %eax # imm = 0x8000 -; CHECK-NEXT: cmovgel %ecx, %eax ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq entry: @@ -943,13 +901,11 @@ define i16 @stest_f32i16_mm(float %x) { ; CHECK-LABEL: stest_f32i16_mm: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: maxss %xmm0, %xmm1 +; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: minss %xmm1, %xmm0 ; CHECK-NEXT: cvttss2si %xmm0, %eax -; CHECK-NEXT: cmpl $32767, %eax # imm = 0x7FFF -; CHECK-NEXT: movl $32767, %ecx # imm = 0x7FFF -; CHECK-NEXT: cmovll %eax, %ecx -; CHECK-NEXT: cmpl $-32767, %ecx # imm = 0x8001 -; CHECK-NEXT: movl $-32768, %eax # imm = 0x8000 -; CHECK-NEXT: cmovgel %ecx, %eax ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: retq entry: @@ -1003,13 +959,11 @@ ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: movzwl %di, %edi ; CHECK-NEXT: callq __gnu_h2f_ieee@PLT +; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; CHECK-NEXT: maxss %xmm0, %xmm1 +; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: minss %xmm1, %xmm0 ; CHECK-NEXT: cvttss2si %xmm0, %eax -; CHECK-NEXT: cmpl $32767, %eax # imm = 0x7FFF -; CHECK-NEXT: movl $32767, %ecx # imm = 0x7FFF -; CHECK-NEXT: cmovll %eax, %ecx -; CHECK-NEXT: cmpl $-32767, %ecx # imm = 0x8001 -; CHECK-NEXT: movl $-32768, %eax # imm = 0x8000 -; CHECK-NEXT: cmovgel %ecx, %eax ; CHECK-NEXT: # kill: def $ax killed $ax killed $eax ; CHECK-NEXT: popq %rcx ; CHECK-NEXT: .cfi_def_cfa_offset 8 @@ -1075,29 +1029,13 @@ define i64 @stest_f64i64_mm(double %x) { ; CHECK-LABEL: stest_f64i64_mm: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: callq __fixdfti@PLT +; CHECK-NEXT: cvttsd2si %xmm0, %rax +; CHECK-NEXT: ucomisd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF -; CHECK-NEXT: cmpq %rcx, %rax -; CHECK-NEXT: movq %rcx, %rsi -; CHECK-NEXT: cmovbq %rax, %rsi -; CHECK-NEXT: xorl %edi, %edi -; CHECK-NEXT: testq %rdx, %rdx -; CHECK-NEXT: cmovsq %rax, %rcx -; CHECK-NEXT: cmoveq %rsi, %rcx -; CHECK-NEXT: cmovsq %rdx, %rdi -; CHECK-NEXT: testq %rdi, %rdi -; CHECK-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 -; CHECK-NEXT: movq %rax, %rdx -; CHECK-NEXT: cmovnsq %rcx, %rdx -; CHECK-NEXT: cmpq %rax, %rcx ; CHECK-NEXT: cmovbeq %rax, %rcx -; CHECK-NEXT: cmpq $-1, %rdi -; CHECK-NEXT: cmovneq %rdx, %rcx -; CHECK-NEXT: movq %rcx, %rax -; CHECK-NEXT: popq %rcx -; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: ucomisd %xmm0, %xmm0 +; CHECK-NEXT: cmovnpq %rcx, %rax ; CHECK-NEXT: retq entry: %conv = fptosi double %x to i128 @@ -1157,29 +1095,13 @@ define i64 @stest_f32i64_mm(float %x) { ; CHECK-LABEL: stest_f32i64_mm: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: pushq %rax -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: callq __fixsfti@PLT +; CHECK-NEXT: cvttss2si %xmm0, %rax +; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF -; CHECK-NEXT: cmpq %rcx, %rax -; CHECK-NEXT: movq %rcx, %rsi -; CHECK-NEXT: cmovbq %rax, %rsi -; CHECK-NEXT: xorl %edi, %edi -; CHECK-NEXT: testq %rdx, %rdx -; CHECK-NEXT: cmovsq %rax, %rcx -; CHECK-NEXT: cmoveq %rsi, %rcx -; CHECK-NEXT: cmovsq %rdx, %rdi -; CHECK-NEXT: testq %rdi, %rdi -; CHECK-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 -; CHECK-NEXT: movq %rax, %rdx -; CHECK-NEXT: cmovnsq %rcx, %rdx -; CHECK-NEXT: cmpq %rax, %rcx ; CHECK-NEXT: cmovbeq %rax, %rcx -; CHECK-NEXT: cmpq $-1, %rdi -; CHECK-NEXT: cmovneq %rdx, %rcx -; CHECK-NEXT: movq %rcx, %rax -; CHECK-NEXT: popq %rcx -; CHECK-NEXT: .cfi_def_cfa_offset 8 +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: ucomiss %xmm0, %xmm0 +; CHECK-NEXT: cmovnpq %rcx, %rax ; CHECK-NEXT: retq entry: %conv = fptosi float %x to i128 @@ -1243,25 +1165,13 @@ ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: movzwl %di, %edi ; CHECK-NEXT: callq __gnu_h2f_ieee@PLT -; CHECK-NEXT: callq __fixsfti@PLT +; CHECK-NEXT: cvttss2si %xmm0, %rax +; CHECK-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; CHECK-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF -; CHECK-NEXT: cmpq %rcx, %rax -; CHECK-NEXT: movq %rcx, %rsi -; CHECK-NEXT: cmovbq %rax, %rsi -; CHECK-NEXT: xorl %edi, %edi -; CHECK-NEXT: testq %rdx, %rdx -; CHECK-NEXT: cmovsq %rax, %rcx -; CHECK-NEXT: cmoveq %rsi, %rcx -; CHECK-NEXT: cmovsq %rdx, %rdi -; CHECK-NEXT: testq %rdi, %rdi -; CHECK-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 -; CHECK-NEXT: movq %rax, %rdx -; CHECK-NEXT: cmovnsq %rcx, %rdx -; CHECK-NEXT: cmpq %rax, %rcx ; CHECK-NEXT: cmovbeq %rax, %rcx -; CHECK-NEXT: cmpq $-1, %rdi -; CHECK-NEXT: cmovneq %rdx, %rcx -; CHECK-NEXT: movq %rcx, %rax +; CHECK-NEXT: xorl %eax, %eax +; CHECK-NEXT: ucomiss %xmm0, %xmm0 +; CHECK-NEXT: cmovnpq %rcx, %rax ; CHECK-NEXT: popq %rcx ; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq