diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -439,6 +439,11 @@ /// PLD. MAT_PCREL_ADDR, + /// Constrained direct move from VSR instruction. This strict opcode is used + /// to keep operands consistent after custom lowering, since strict nodes + /// have an extra operand for chain. + STRICT_MFVSR = ISD::FIRST_TARGET_STRICTFP_OPCODE, + /// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a /// byte-swapping store instruction. It byte-swaps the low "Type" bits of /// the GPRC input, then stores it through Ptr. Type can be either i16 or diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -429,6 +429,7 @@ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal); } else { // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores. + setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); // PowerPC does not have [U|S]INT_TO_FP @@ -561,12 +562,15 @@ if (Subtarget.has64BitSupport()) { // They also have instructions for converting between i64 and fp. + setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom); + setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Expand); setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); // This is just the low 32 bits of a (signed) fp->i64 conversion. // We cannot do this with Promote because i64 is not a legal type. + setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); if (Subtarget.hasLFIWAX() || Subtarget.isPPC64()) @@ -575,19 +579,25 @@ // PowerPC does not have FP_TO_UINT on 32-bit implementations. if (Subtarget.hasSPE()) setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal); - else + else { + setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Expand); setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); + } } // With the instructions enabled under FPCVT, we can do everything. if (Subtarget.hasFPCVT()) { if (Subtarget.has64BitSupport()) { + setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom); + setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); } + setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom); + setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); @@ -1573,6 +1583,8 @@ case PPCISD::MAT_PCREL_ADDR: return "PPCISD::MAT_PCREL_ADDR"; case PPCISD::LD_SPLAT: return "PPCISD::LD_SPLAT"; case PPCISD::FNMSUB: return "PPCISD::FNMSUB"; + case PPCISD::STRICT_MFVSR: + return "PPCISD::STRICT_MFVSR"; } return nullptr; } @@ -8179,11 +8191,19 @@ static SDValue convertFPToInt(SDValue Op, SelectionDAG &DAG, const PPCSubtarget &Subtarget) { SDLoc dl(Op); - bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT; - SDValue Src = Op.getOperand(0); + bool IsStrict = Op->isStrictFPOpcode(); + bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT || + Op.getOpcode() == ISD::STRICT_FP_TO_SINT; + // For strict nodes, source is the second operand. + SDValue Src = Op.getOperand(IsStrict ? 1 : 0); assert(Src.getValueType().isFloatingPoint()); - if (Src.getValueType() == MVT::f32) - Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src); + if (Src.getValueType() == MVT::f32) { + if (IsStrict) + Src = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {MVT::f64, MVT::Other}, + {Op.getOperand(0), Src}); + else + Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src); + } SDValue Conv; switch (Op.getSimpleValueType().SimpleTy) { default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!"); @@ -8206,7 +8226,8 @@ SelectionDAG &DAG, const SDLoc &dl) const { SDValue Tmp = convertFPToInt(Op, DAG, Subtarget); - bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT; + bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT || + Op.getOpcode() == ISD::STRICT_FP_TO_SINT; // Convert the FP value to an int value through memory. bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() && @@ -8250,14 +8271,18 @@ SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op, SelectionDAG &DAG, const SDLoc &dl) const { - assert(Op.getOperand(0).getValueType().isFloatingPoint()); - return DAG.getNode(PPCISD::MFVSR, dl, Op.getSimpleValueType().SimpleTy, - convertFPToInt(Op, DAG, Subtarget)); + if (Op->isStrictFPOpcode()) + return DAG.getNode(PPCISD::STRICT_MFVSR, dl, + {Op.getValueType(), MVT::Other}, + {Op.getOperand(0), convertFPToInt(Op, DAG, Subtarget)}); + else + return DAG.getNode(PPCISD::MFVSR, dl, Op.getValueType(), + convertFPToInt(Op, DAG, Subtarget)); } SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, const SDLoc &dl) const { - SDValue Src = Op.getOperand(0); + SDValue Src = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0); // FP to INT conversions are legal for f128. if (EnableQuadPrecision && (Src.getValueType() == MVT::f128)) return Op; @@ -10970,6 +10995,8 @@ case ISD::STORE: return LowerSTORE(Op, DAG); case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG); case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); + case ISD::STRICT_FP_TO_UINT: + case ISD::STRICT_FP_TO_SINT: case ISD::FP_TO_UINT: case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, SDLoc(Op)); case ISD::UINT_TO_FP: @@ -11061,10 +11088,13 @@ } return; } + case ISD::STRICT_FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: // LowerFP_TO_INT() can only handle f32 and f64. - if (N->getOperand(0).getValueType() == MVT::ppcf128) + if (N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType() == + MVT::ppcf128) return; Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl)); return; diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td --- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -140,6 +140,8 @@ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; def PPCSToV : SDNode<"PPCISD::SCALAR_TO_VECTOR_PERMUTED", SDTypeProfile<1, 1, []>, []>; +def PPCstrict_mfvsr : SDNode<"PPCISD::STRICT_MFVSR", SDTUnaryOp, + [SDNPHasChain]>; //-------------------------- Predicate definitions ---------------------------// def HasVSX : Predicate<"Subtarget->hasVSX()">; @@ -3326,6 +3328,12 @@ def : Pat<(i64 (llround f32:$S)), (i64 (MFVSRD (FCTID (XSRDPI (COPY_TO_REGCLASS $S, VSFRC)))))>; +// PPC-specific direct move operators. +def : Pat<(i64 (PPCstrict_mfvsr f64:$A)), + (i64 (MFVSRD f64:$A))>; +def : Pat<(i32 (PPCstrict_mfvsr f64:$A)), + (i32 (MFVSRWZ f64:$A))>; + // Alternate patterns for PPCmtvsrz where the output is v8i16 or v16i8 instead // of f64 def : Pat<(v8i16 (PPCmtvsrz i32:$A)), @@ -3761,11 +3769,11 @@ (f128 (XSCVUDQP (LXSIBZX xoaddr:$src)))>; // Truncate & Convert QP -> (Un)Signed (D)Word. -def : Pat<(i64 (fp_to_sint f128:$src)), (i64 (MFVRD (XSCVQPSDZ $src)))>; -def : Pat<(i64 (fp_to_uint f128:$src)), (i64 (MFVRD (XSCVQPUDZ $src)))>; -def : Pat<(i32 (fp_to_sint f128:$src)), +def : Pat<(i64 (any_fp_to_sint f128:$src)), (i64 (MFVRD (XSCVQPSDZ $src)))>; +def : Pat<(i64 (any_fp_to_uint f128:$src)), (i64 (MFVRD (XSCVQPUDZ $src)))>; +def : Pat<(i32 (any_fp_to_sint f128:$src)), (i32 (MFVSRWZ (COPY_TO_REGCLASS (XSCVQPSWZ $src), VFRC)))>; -def : Pat<(i32 (fp_to_uint f128:$src)), +def : Pat<(i32 (any_fp_to_uint f128:$src)), (i32 (MFVSRWZ (COPY_TO_REGCLASS (XSCVQPUWZ $src), VFRC)))>; // Instructions for store(fptosi). diff --git a/llvm/test/CodeGen/PowerPC/fp-strict-conv-f128.ll b/llvm/test/CodeGen/PowerPC/fp-strict-conv-f128.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/fp-strict-conv-f128.ll @@ -0,0 +1,566 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: < %s -mtriple=powerpc64-unknown-linux -enable-ppc-quad-precision \ +; RUN: -mcpu=pwr8 | FileCheck %s -check-prefix=P8 +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: < %s -mtriple=powerpc64le-unknown-linux -enable-ppc-quad-precision \ +; RUN: -mcpu=pwr9 | FileCheck %s -check-prefix=P9 +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: < %s -mtriple=powerpc64le-unknown-linux -enable-ppc-quad-precision \ +; RUN: -mcpu=pwr8 -mattr=-vsx | FileCheck %s -check-prefix=NOVSX + +declare i32 @llvm.experimental.constrained.fptosi.i32.f128(fp128, metadata) +declare i64 @llvm.experimental.constrained.fptosi.i64.f128(fp128, metadata) +declare i64 @llvm.experimental.constrained.fptoui.i64.f128(fp128, metadata) +declare i32 @llvm.experimental.constrained.fptoui.i32.f128(fp128, metadata) + +declare i32 @llvm.experimental.constrained.fptosi.i32.ppcf128(ppc_fp128, metadata) +declare i64 @llvm.experimental.constrained.fptosi.i64.ppcf128(ppc_fp128, metadata) +declare i64 @llvm.experimental.constrained.fptoui.i64.ppcf128(ppc_fp128, metadata) +declare i32 @llvm.experimental.constrained.fptoui.i32.ppcf128(ppc_fp128, metadata) + +declare i128 @llvm.experimental.constrained.fptosi.i128.ppcf128(ppc_fp128, metadata) +declare i128 @llvm.experimental.constrained.fptoui.i128.ppcf128(ppc_fp128, metadata) +declare i128 @llvm.experimental.constrained.fptosi.i128.f128(fp128, metadata) +declare i128 @llvm.experimental.constrained.fptoui.i128.f128(fp128, metadata) + +define i128 @q_to_i128(fp128 %m) #0 { +; P8-LABEL: q_to_i128: +; P8: # %bb.0: # %entry +; P8-NEXT: mflr r0 +; P8-NEXT: std r0, 16(r1) +; P8-NEXT: stdu r1, -112(r1) +; P8-NEXT: .cfi_def_cfa_offset 112 +; P8-NEXT: .cfi_offset lr, 16 +; P8-NEXT: bl __fixtfti +; P8-NEXT: nop +; P8-NEXT: addi r1, r1, 112 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; P9-LABEL: q_to_i128: +; P9: # %bb.0: # %entry +; P9-NEXT: mflr r0 +; P9-NEXT: std r0, 16(r1) +; P9-NEXT: stdu r1, -32(r1) +; P9-NEXT: .cfi_def_cfa_offset 32 +; P9-NEXT: .cfi_offset lr, 16 +; P9-NEXT: bl __fixtfti +; P9-NEXT: nop +; P9-NEXT: addi r1, r1, 32 +; P9-NEXT: ld r0, 16(r1) +; P9-NEXT: mtlr r0 +; P9-NEXT: blr +; +; NOVSX-LABEL: q_to_i128: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: mflr r0 +; NOVSX-NEXT: std r0, 16(r1) +; NOVSX-NEXT: stdu r1, -32(r1) +; NOVSX-NEXT: .cfi_def_cfa_offset 32 +; NOVSX-NEXT: .cfi_offset lr, 16 +; NOVSX-NEXT: bl __fixtfti +; NOVSX-NEXT: nop +; NOVSX-NEXT: addi r1, r1, 32 +; NOVSX-NEXT: ld r0, 16(r1) +; NOVSX-NEXT: mtlr r0 +; NOVSX-NEXT: blr +entry: + %conv = tail call i128 @llvm.experimental.constrained.fptosi.i128.f128(fp128 %m, metadata !"fpexcept.strict") #0 + ret i128 %conv +} + +define i128 @q_to_u128(fp128 %m) #0 { +; P8-LABEL: q_to_u128: +; P8: # %bb.0: # %entry +; P8-NEXT: mflr r0 +; P8-NEXT: std r0, 16(r1) +; P8-NEXT: stdu r1, -112(r1) +; P8-NEXT: .cfi_def_cfa_offset 112 +; P8-NEXT: .cfi_offset lr, 16 +; P8-NEXT: bl __fixunstfti +; P8-NEXT: nop +; P8-NEXT: addi r1, r1, 112 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; P9-LABEL: q_to_u128: +; P9: # %bb.0: # %entry +; P9-NEXT: mflr r0 +; P9-NEXT: std r0, 16(r1) +; P9-NEXT: stdu r1, -32(r1) +; P9-NEXT: .cfi_def_cfa_offset 32 +; P9-NEXT: .cfi_offset lr, 16 +; P9-NEXT: bl __fixunstfti +; P9-NEXT: nop +; P9-NEXT: addi r1, r1, 32 +; P9-NEXT: ld r0, 16(r1) +; P9-NEXT: mtlr r0 +; P9-NEXT: blr +; +; NOVSX-LABEL: q_to_u128: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: mflr r0 +; NOVSX-NEXT: std r0, 16(r1) +; NOVSX-NEXT: stdu r1, -32(r1) +; NOVSX-NEXT: .cfi_def_cfa_offset 32 +; NOVSX-NEXT: .cfi_offset lr, 16 +; NOVSX-NEXT: bl __fixunstfti +; NOVSX-NEXT: nop +; NOVSX-NEXT: addi r1, r1, 32 +; NOVSX-NEXT: ld r0, 16(r1) +; NOVSX-NEXT: mtlr r0 +; NOVSX-NEXT: blr +entry: + %conv = tail call i128 @llvm.experimental.constrained.fptoui.i128.f128(fp128 %m, metadata !"fpexcept.strict") #0 + ret i128 %conv +} + +define i128 @ppcq_to_i128(ppc_fp128 %m) #0 { +; P8-LABEL: ppcq_to_i128: +; P8: # %bb.0: # %entry +; P8-NEXT: mflr r0 +; P8-NEXT: std r0, 16(r1) +; P8-NEXT: stdu r1, -112(r1) +; P8-NEXT: .cfi_def_cfa_offset 112 +; P8-NEXT: .cfi_offset lr, 16 +; P8-NEXT: bl __fixtfti +; P8-NEXT: nop +; P8-NEXT: addi r1, r1, 112 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; P9-LABEL: ppcq_to_i128: +; P9: # %bb.0: # %entry +; P9-NEXT: mflr r0 +; P9-NEXT: std r0, 16(r1) +; P9-NEXT: stdu r1, -32(r1) +; P9-NEXT: .cfi_def_cfa_offset 32 +; P9-NEXT: .cfi_offset lr, 16 +; P9-NEXT: bl __fixtfti +; P9-NEXT: nop +; P9-NEXT: addi r1, r1, 32 +; P9-NEXT: ld r0, 16(r1) +; P9-NEXT: mtlr r0 +; P9-NEXT: blr +; +; NOVSX-LABEL: ppcq_to_i128: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: mflr r0 +; NOVSX-NEXT: std r0, 16(r1) +; NOVSX-NEXT: stdu r1, -32(r1) +; NOVSX-NEXT: .cfi_def_cfa_offset 32 +; NOVSX-NEXT: .cfi_offset lr, 16 +; NOVSX-NEXT: bl __fixtfti +; NOVSX-NEXT: nop +; NOVSX-NEXT: addi r1, r1, 32 +; NOVSX-NEXT: ld r0, 16(r1) +; NOVSX-NEXT: mtlr r0 +; NOVSX-NEXT: blr +entry: + %conv = tail call i128 @llvm.experimental.constrained.fptosi.i128.ppcf128(ppc_fp128 %m, metadata !"fpexcept.strict") #0 + ret i128 %conv +} + +define i128 @ppcq_to_u128(ppc_fp128 %m) #0 { +; P8-LABEL: ppcq_to_u128: +; P8: # %bb.0: # %entry +; P8-NEXT: mflr r0 +; P8-NEXT: std r0, 16(r1) +; P8-NEXT: stdu r1, -112(r1) +; P8-NEXT: .cfi_def_cfa_offset 112 +; P8-NEXT: .cfi_offset lr, 16 +; P8-NEXT: bl __fixtfti +; P8-NEXT: nop +; P8-NEXT: addi r1, r1, 112 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; P9-LABEL: ppcq_to_u128: +; P9: # %bb.0: # %entry +; P9-NEXT: mflr r0 +; P9-NEXT: std r0, 16(r1) +; P9-NEXT: stdu r1, -32(r1) +; P9-NEXT: .cfi_def_cfa_offset 32 +; P9-NEXT: .cfi_offset lr, 16 +; P9-NEXT: bl __fixtfti +; P9-NEXT: nop +; P9-NEXT: addi r1, r1, 32 +; P9-NEXT: ld r0, 16(r1) +; P9-NEXT: mtlr r0 +; P9-NEXT: blr +; +; NOVSX-LABEL: ppcq_to_u128: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: mflr r0 +; NOVSX-NEXT: std r0, 16(r1) +; NOVSX-NEXT: stdu r1, -32(r1) +; NOVSX-NEXT: .cfi_def_cfa_offset 32 +; NOVSX-NEXT: .cfi_offset lr, 16 +; NOVSX-NEXT: bl __fixtfti +; NOVSX-NEXT: nop +; NOVSX-NEXT: addi r1, r1, 32 +; NOVSX-NEXT: ld r0, 16(r1) +; NOVSX-NEXT: mtlr r0 +; NOVSX-NEXT: blr +entry: + %conv = tail call i128 @llvm.experimental.constrained.fptosi.i128.ppcf128(ppc_fp128 %m, metadata !"fpexcept.strict") #0 + ret i128 %conv +} + +define signext i32 @q_to_i32(fp128 %m) #0 { +; P8-LABEL: q_to_i32: +; P8: # %bb.0: # %entry +; P8-NEXT: mflr r0 +; P8-NEXT: std r0, 16(r1) +; P8-NEXT: stdu r1, -112(r1) +; P8-NEXT: .cfi_def_cfa_offset 112 +; P8-NEXT: .cfi_offset lr, 16 +; P8-NEXT: bl __fixkfsi +; P8-NEXT: nop +; P8-NEXT: extsw r3, r3 +; P8-NEXT: addi r1, r1, 112 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; P9-LABEL: q_to_i32: +; P9: # %bb.0: # %entry +; P9-NEXT: xscvqpswz v2, v2 +; P9-NEXT: mfvsrwz r3, v2 +; P9-NEXT: extsw r3, r3 +; P9-NEXT: blr +; +; NOVSX-LABEL: q_to_i32: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: mflr r0 +; NOVSX-NEXT: std r0, 16(r1) +; NOVSX-NEXT: stdu r1, -32(r1) +; NOVSX-NEXT: .cfi_def_cfa_offset 32 +; NOVSX-NEXT: .cfi_offset lr, 16 +; NOVSX-NEXT: bl __fixkfsi +; NOVSX-NEXT: nop +; NOVSX-NEXT: extsw r3, r3 +; NOVSX-NEXT: addi r1, r1, 32 +; NOVSX-NEXT: ld r0, 16(r1) +; NOVSX-NEXT: mtlr r0 +; NOVSX-NEXT: blr +entry: + %conv = tail call i32 @llvm.experimental.constrained.fptosi.i32.f128(fp128 %m, metadata !"fpexcept.strict") #0 + ret i32 %conv +} + +define i64 @q_to_i64(fp128 %m) #0 { +; P8-LABEL: q_to_i64: +; P8: # %bb.0: # %entry +; P8-NEXT: mflr r0 +; P8-NEXT: std r0, 16(r1) +; P8-NEXT: stdu r1, -112(r1) +; P8-NEXT: .cfi_def_cfa_offset 112 +; P8-NEXT: .cfi_offset lr, 16 +; P8-NEXT: bl __fixkfdi +; P8-NEXT: nop +; P8-NEXT: addi r1, r1, 112 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; P9-LABEL: q_to_i64: +; P9: # %bb.0: # %entry +; P9-NEXT: xscvqpsdz v2, v2 +; P9-NEXT: mfvsrd r3, v2 +; P9-NEXT: blr +; +; NOVSX-LABEL: q_to_i64: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: mflr r0 +; NOVSX-NEXT: std r0, 16(r1) +; NOVSX-NEXT: stdu r1, -32(r1) +; NOVSX-NEXT: .cfi_def_cfa_offset 32 +; NOVSX-NEXT: .cfi_offset lr, 16 +; NOVSX-NEXT: bl __fixkfdi +; NOVSX-NEXT: nop +; NOVSX-NEXT: addi r1, r1, 32 +; NOVSX-NEXT: ld r0, 16(r1) +; NOVSX-NEXT: mtlr r0 +; NOVSX-NEXT: blr +entry: + %conv = tail call i64 @llvm.experimental.constrained.fptosi.i64.f128(fp128 %m, metadata !"fpexcept.strict") #0 + ret i64 %conv +} + +define i64 @q_to_u64(fp128 %m) #0 { +; P8-LABEL: q_to_u64: +; P8: # %bb.0: # %entry +; P8-NEXT: mflr r0 +; P8-NEXT: std r0, 16(r1) +; P8-NEXT: stdu r1, -112(r1) +; P8-NEXT: .cfi_def_cfa_offset 112 +; P8-NEXT: .cfi_offset lr, 16 +; P8-NEXT: bl __fixunskfdi +; P8-NEXT: nop +; P8-NEXT: addi r1, r1, 112 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; P9-LABEL: q_to_u64: +; P9: # %bb.0: # %entry +; P9-NEXT: xscvqpudz v2, v2 +; P9-NEXT: mfvsrd r3, v2 +; P9-NEXT: blr +; +; NOVSX-LABEL: q_to_u64: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: mflr r0 +; NOVSX-NEXT: std r0, 16(r1) +; NOVSX-NEXT: stdu r1, -32(r1) +; NOVSX-NEXT: .cfi_def_cfa_offset 32 +; NOVSX-NEXT: .cfi_offset lr, 16 +; NOVSX-NEXT: bl __fixunskfdi +; NOVSX-NEXT: nop +; NOVSX-NEXT: addi r1, r1, 32 +; NOVSX-NEXT: ld r0, 16(r1) +; NOVSX-NEXT: mtlr r0 +; NOVSX-NEXT: blr +entry: + %conv = tail call i64 @llvm.experimental.constrained.fptoui.i64.f128(fp128 %m, metadata !"fpexcept.strict") #0 + ret i64 %conv +} + +define zeroext i32 @q_to_u32(fp128 %m) #0 { +; P8-LABEL: q_to_u32: +; P8: # %bb.0: # %entry +; P8-NEXT: mflr r0 +; P8-NEXT: std r0, 16(r1) +; P8-NEXT: stdu r1, -112(r1) +; P8-NEXT: .cfi_def_cfa_offset 112 +; P8-NEXT: .cfi_offset lr, 16 +; P8-NEXT: bl __fixunskfsi +; P8-NEXT: nop +; P8-NEXT: addi r1, r1, 112 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; P9-LABEL: q_to_u32: +; P9: # %bb.0: # %entry +; P9-NEXT: xscvqpuwz v2, v2 +; P9-NEXT: mfvsrwz r3, v2 +; P9-NEXT: clrldi r3, r3, 32 +; P9-NEXT: blr +; +; NOVSX-LABEL: q_to_u32: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: mflr r0 +; NOVSX-NEXT: std r0, 16(r1) +; NOVSX-NEXT: stdu r1, -32(r1) +; NOVSX-NEXT: .cfi_def_cfa_offset 32 +; NOVSX-NEXT: .cfi_offset lr, 16 +; NOVSX-NEXT: bl __fixunskfsi +; NOVSX-NEXT: nop +; NOVSX-NEXT: addi r1, r1, 32 +; NOVSX-NEXT: ld r0, 16(r1) +; NOVSX-NEXT: mtlr r0 +; NOVSX-NEXT: blr +entry: + %conv = tail call i32 @llvm.experimental.constrained.fptoui.i32.f128(fp128 %m, metadata !"fpexcept.strict") #0 + ret i32 %conv +} + +define signext i32 @ppcq_to_i32(ppc_fp128 %m) #0 { +; P8-LABEL: ppcq_to_i32: +; P8: # %bb.0: # %entry +; P8-NEXT: mflr r0 +; P8-NEXT: std r0, 16(r1) +; P8-NEXT: stdu r1, -112(r1) +; P8-NEXT: .cfi_def_cfa_offset 112 +; P8-NEXT: .cfi_offset lr, 16 +; P8-NEXT: bl __gcc_qtou +; P8-NEXT: nop +; P8-NEXT: extsw r3, r3 +; P8-NEXT: addi r1, r1, 112 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; P9-LABEL: ppcq_to_i32: +; P9: # %bb.0: # %entry +; P9-NEXT: mflr r0 +; P9-NEXT: std r0, 16(r1) +; P9-NEXT: stdu r1, -32(r1) +; P9-NEXT: .cfi_def_cfa_offset 32 +; P9-NEXT: .cfi_offset lr, 16 +; P9-NEXT: bl __gcc_qtou +; P9-NEXT: nop +; P9-NEXT: extsw r3, r3 +; P9-NEXT: addi r1, r1, 32 +; P9-NEXT: ld r0, 16(r1) +; P9-NEXT: mtlr r0 +; P9-NEXT: blr +; +; NOVSX-LABEL: ppcq_to_i32: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: mflr r0 +; NOVSX-NEXT: std r0, 16(r1) +; NOVSX-NEXT: stdu r1, -32(r1) +; NOVSX-NEXT: .cfi_def_cfa_offset 32 +; NOVSX-NEXT: .cfi_offset lr, 16 +; NOVSX-NEXT: bl __gcc_qtou +; NOVSX-NEXT: nop +; NOVSX-NEXT: extsw r3, r3 +; NOVSX-NEXT: addi r1, r1, 32 +; NOVSX-NEXT: ld r0, 16(r1) +; NOVSX-NEXT: mtlr r0 +; NOVSX-NEXT: blr +entry: + %conv = tail call i32 @llvm.experimental.constrained.fptosi.i32.ppcf128(ppc_fp128 %m, metadata !"fpexcept.strict") #0 + ret i32 %conv +} + +define i64 @ppcq_to_i64(ppc_fp128 %m) #0 { +; P8-LABEL: ppcq_to_i64: +; P8: # %bb.0: # %entry +; P8-NEXT: mflr r0 +; P8-NEXT: std r0, 16(r1) +; P8-NEXT: stdu r1, -112(r1) +; P8-NEXT: .cfi_def_cfa_offset 112 +; P8-NEXT: .cfi_offset lr, 16 +; P8-NEXT: bl __fixtfdi +; P8-NEXT: nop +; P8-NEXT: addi r1, r1, 112 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; P9-LABEL: ppcq_to_i64: +; P9: # %bb.0: # %entry +; P9-NEXT: mflr r0 +; P9-NEXT: std r0, 16(r1) +; P9-NEXT: stdu r1, -32(r1) +; P9-NEXT: .cfi_def_cfa_offset 32 +; P9-NEXT: .cfi_offset lr, 16 +; P9-NEXT: bl __fixtfdi +; P9-NEXT: nop +; P9-NEXT: addi r1, r1, 32 +; P9-NEXT: ld r0, 16(r1) +; P9-NEXT: mtlr r0 +; P9-NEXT: blr +; +; NOVSX-LABEL: ppcq_to_i64: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: mflr r0 +; NOVSX-NEXT: std r0, 16(r1) +; NOVSX-NEXT: stdu r1, -32(r1) +; NOVSX-NEXT: .cfi_def_cfa_offset 32 +; NOVSX-NEXT: .cfi_offset lr, 16 +; NOVSX-NEXT: bl __fixtfdi +; NOVSX-NEXT: nop +; NOVSX-NEXT: addi r1, r1, 32 +; NOVSX-NEXT: ld r0, 16(r1) +; NOVSX-NEXT: mtlr r0 +; NOVSX-NEXT: blr +entry: + %conv = tail call i64 @llvm.experimental.constrained.fptosi.i64.ppcf128(ppc_fp128 %m, metadata !"fpexcept.strict") #0 + ret i64 %conv +} + +define i64 @ppcq_to_u64(ppc_fp128 %m) #0 { +; P8-LABEL: ppcq_to_u64: +; P8: # %bb.0: # %entry +; P8-NEXT: mflr r0 +; P8-NEXT: std r0, 16(r1) +; P8-NEXT: stdu r1, -112(r1) +; P8-NEXT: .cfi_def_cfa_offset 112 +; P8-NEXT: .cfi_offset lr, 16 +; P8-NEXT: bl __fixunstfdi +; P8-NEXT: nop +; P8-NEXT: addi r1, r1, 112 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; P9-LABEL: ppcq_to_u64: +; P9: # %bb.0: # %entry +; P9-NEXT: mflr r0 +; P9-NEXT: std r0, 16(r1) +; P9-NEXT: stdu r1, -32(r1) +; P9-NEXT: .cfi_def_cfa_offset 32 +; P9-NEXT: .cfi_offset lr, 16 +; P9-NEXT: bl __fixunstfdi +; P9-NEXT: nop +; P9-NEXT: addi r1, r1, 32 +; P9-NEXT: ld r0, 16(r1) +; P9-NEXT: mtlr r0 +; P9-NEXT: blr +; +; NOVSX-LABEL: ppcq_to_u64: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: mflr r0 +; NOVSX-NEXT: std r0, 16(r1) +; NOVSX-NEXT: stdu r1, -32(r1) +; NOVSX-NEXT: .cfi_def_cfa_offset 32 +; NOVSX-NEXT: .cfi_offset lr, 16 +; NOVSX-NEXT: bl __fixunstfdi +; NOVSX-NEXT: nop +; NOVSX-NEXT: addi r1, r1, 32 +; NOVSX-NEXT: ld r0, 16(r1) +; NOVSX-NEXT: mtlr r0 +; NOVSX-NEXT: blr +entry: + %conv = tail call i64 @llvm.experimental.constrained.fptoui.i64.ppcf128(ppc_fp128 %m, metadata !"fpexcept.strict") #0 + ret i64 %conv +} + +define zeroext i32 @ppcq_to_u32(ppc_fp128 %m) #0 { +; P8-LABEL: ppcq_to_u32: +; P8: # %bb.0: # %entry +; P8-NEXT: mflr r0 +; P8-NEXT: std r0, 16(r1) +; P8-NEXT: stdu r1, -112(r1) +; P8-NEXT: .cfi_def_cfa_offset 112 +; P8-NEXT: .cfi_offset lr, 16 +; P8-NEXT: bl __fixunstfsi +; P8-NEXT: nop +; P8-NEXT: addi r1, r1, 112 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; P9-LABEL: ppcq_to_u32: +; P9: # %bb.0: # %entry +; P9-NEXT: mflr r0 +; P9-NEXT: std r0, 16(r1) +; P9-NEXT: stdu r1, -32(r1) +; P9-NEXT: .cfi_def_cfa_offset 32 +; P9-NEXT: .cfi_offset lr, 16 +; P9-NEXT: bl __fixunstfsi +; P9-NEXT: nop +; P9-NEXT: addi r1, r1, 32 +; P9-NEXT: ld r0, 16(r1) +; P9-NEXT: mtlr r0 +; P9-NEXT: blr +; +; NOVSX-LABEL: ppcq_to_u32: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: mflr r0 +; NOVSX-NEXT: std r0, 16(r1) +; NOVSX-NEXT: stdu r1, -32(r1) +; NOVSX-NEXT: .cfi_def_cfa_offset 32 +; NOVSX-NEXT: .cfi_offset lr, 16 +; NOVSX-NEXT: bl __fixunstfsi +; NOVSX-NEXT: nop +; NOVSX-NEXT: addi r1, r1, 32 +; NOVSX-NEXT: ld r0, 16(r1) +; NOVSX-NEXT: mtlr r0 +; NOVSX-NEXT: blr +entry: + %conv = tail call i32 @llvm.experimental.constrained.fptoui.i32.ppcf128(ppc_fp128 %m, metadata !"fpexcept.strict") #0 + ret i32 %conv +} + +attributes #0 = { strictfp } diff --git a/llvm/test/CodeGen/PowerPC/fp-strict-conv.ll b/llvm/test/CodeGen/PowerPC/fp-strict-conv.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/fp-strict-conv.ll @@ -0,0 +1,171 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: < %s -mtriple=powerpc64-unknown-linux -mcpu=pwr8 | FileCheck %s +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: < %s -mtriple=powerpc64le-unknown-linux -mcpu=pwr9 | FileCheck %s +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: < %s -mtriple=powerpc64le-unknown-linux -mcpu=pwr8 -mattr=-vsx | \ +; RUN: FileCheck %s -check-prefix=NOVSX + +declare i32 @llvm.experimental.constrained.fptosi.i32.f64(double, metadata) +declare i64 @llvm.experimental.constrained.fptosi.i64.f64(double, metadata) +declare i64 @llvm.experimental.constrained.fptoui.i64.f64(double, metadata) +declare i32 @llvm.experimental.constrained.fptoui.i32.f64(double, metadata) + +declare i32 @llvm.experimental.constrained.fptosi.i32.f32(float, metadata) +declare i64 @llvm.experimental.constrained.fptosi.i64.f32(float, metadata) +declare i64 @llvm.experimental.constrained.fptoui.i64.f32(float, metadata) +declare i32 @llvm.experimental.constrained.fptoui.i32.f32(float, metadata) + +define i32 @d_to_i32(double %m) #0 { +; CHECK-LABEL: d_to_i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xscvdpsxws f0, f1 +; CHECK-NEXT: mffprwz r3, f0 +; CHECK-NEXT: blr +; +; NOVSX-LABEL: d_to_i32: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: fctiwz f0, f1 +; NOVSX-NEXT: addi r3, r1, -4 +; NOVSX-NEXT: stfiwx f0, 0, r3 +; NOVSX-NEXT: lwz r3, -4(r1) +; NOVSX-NEXT: blr +entry: + %conv = call i32 @llvm.experimental.constrained.fptosi.i32.f64(double %m, metadata !"fpexcept.strict") #0 + ret i32 %conv +} + +define i64 @d_to_i64(double %m) #0 { +; CHECK-LABEL: d_to_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xscvdpsxds f0, f1 +; CHECK-NEXT: mffprd r3, f0 +; CHECK-NEXT: blr +; +; NOVSX-LABEL: d_to_i64: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: fctidz f0, f1 +; NOVSX-NEXT: stfd f0, -8(r1) +; NOVSX-NEXT: ld r3, -8(r1) +; NOVSX-NEXT: blr +entry: + %conv = call i64 @llvm.experimental.constrained.fptosi.i64.f64(double %m, metadata !"fpexcept.strict") #0 + ret i64 %conv +} + +define i64 @d_to_u64(double %m) #0 { +; CHECK-LABEL: d_to_u64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xscvdpuxds f0, f1 +; CHECK-NEXT: mffprd r3, f0 +; CHECK-NEXT: blr +; +; NOVSX-LABEL: d_to_u64: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: fctiduz f0, f1 +; NOVSX-NEXT: stfd f0, -8(r1) +; NOVSX-NEXT: ld r3, -8(r1) +; NOVSX-NEXT: blr +entry: + %conv = call i64 @llvm.experimental.constrained.fptoui.i64.f64(double %m, metadata !"fpexcept.strict") #0 + ret i64 %conv +} + +define zeroext i32 @d_to_u32(double %m) #0 { +; CHECK-LABEL: d_to_u32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xscvdpuxws f0, f1 +; CHECK-NEXT: mffprwz r3, f0 +; CHECK-NEXT: clrldi r3, r3, 32 +; CHECK-NEXT: blr +; +; NOVSX-LABEL: d_to_u32: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: fctiwuz f0, f1 +; NOVSX-NEXT: addi r3, r1, -4 +; NOVSX-NEXT: stfiwx f0, 0, r3 +; NOVSX-NEXT: lwz r3, -4(r1) +; NOVSX-NEXT: blr +entry: + %conv = call i32 @llvm.experimental.constrained.fptoui.i32.f64(double %m, metadata !"fpexcept.strict") #0 + ret i32 %conv +} + +define signext i32 @f_to_i32(float %m) #0 { +; CHECK-LABEL: f_to_i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xscvdpsxws f0, f1 +; CHECK-NEXT: mffprwz r3, f0 +; CHECK-NEXT: extsw r3, r3 +; CHECK-NEXT: blr +; +; NOVSX-LABEL: f_to_i32: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: fctiwz f0, f1 +; NOVSX-NEXT: addi r3, r1, -4 +; NOVSX-NEXT: stfiwx f0, 0, r3 +; NOVSX-NEXT: lwa r3, -4(r1) +; NOVSX-NEXT: blr +entry: + %conv = call i32 @llvm.experimental.constrained.fptosi.i32.f32(float %m, metadata !"fpexcept.strict") #0 + ret i32 %conv +} + +define i64 @f_to_i64(float %m) #0 { +; CHECK-LABEL: f_to_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xscvdpsxds f0, f1 +; CHECK-NEXT: mffprd r3, f0 +; CHECK-NEXT: blr +; +; NOVSX-LABEL: f_to_i64: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: fctidz f0, f1 +; NOVSX-NEXT: stfd f0, -8(r1) +; NOVSX-NEXT: ld r3, -8(r1) +; NOVSX-NEXT: blr +entry: + %conv = call i64 @llvm.experimental.constrained.fptosi.i64.f32(float %m, metadata !"fpexcept.strict") #0 + ret i64 %conv +} + +define i64 @f_to_u64(float %m) #0 { +; CHECK-LABEL: f_to_u64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xscvdpuxds f0, f1 +; CHECK-NEXT: mffprd r3, f0 +; CHECK-NEXT: blr +; +; NOVSX-LABEL: f_to_u64: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: fctiduz f0, f1 +; NOVSX-NEXT: stfd f0, -8(r1) +; NOVSX-NEXT: ld r3, -8(r1) +; NOVSX-NEXT: blr +entry: + %conv = call i64 @llvm.experimental.constrained.fptoui.i64.f32(float %m, metadata !"fpexcept.strict") #0 + ret i64 %conv +} + +define zeroext i32 @f_to_u32(float %m) #0 { +; CHECK-LABEL: f_to_u32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xscvdpuxws f0, f1 +; CHECK-NEXT: mffprwz r3, f0 +; CHECK-NEXT: clrldi r3, r3, 32 +; CHECK-NEXT: blr +; +; NOVSX-LABEL: f_to_u32: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: fctiwuz f0, f1 +; NOVSX-NEXT: addi r3, r1, -4 +; NOVSX-NEXT: stfiwx f0, 0, r3 +; NOVSX-NEXT: lwz r3, -4(r1) +; NOVSX-NEXT: blr +entry: + %conv = call i32 @llvm.experimental.constrained.fptoui.i32.f32(float %m, metadata !"fpexcept.strict") #0 + ret i32 %conv +} + +attributes #0 = { strictfp }