diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -452,6 +452,11 @@ /// PLD. MAT_PCREL_ADDR, + /// Constrained direct move from VSR instruction. This strict opcode is used + /// to keep operands consistent after custom lowering, since strict nodes + /// have an extra operand for chain. + STRICT_MFVSR = ISD::FIRST_TARGET_STRICTFP_OPCODE, + /// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a /// byte-swapping store instruction. It byte-swaps the low "Type" bits of /// the GPRC input, then stores it through Ptr. Type can be either i16 or diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -431,6 +431,7 @@ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal); } else { // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores. + setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); // PowerPC does not have [U|S]INT_TO_FP @@ -563,12 +564,15 @@ if (Subtarget.has64BitSupport()) { // They also have instructions for converting between i64 and fp. + setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom); + setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Expand); setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); // This is just the low 32 bits of a (signed) fp->i64 conversion. // We cannot do this with Promote because i64 is not a legal type. + setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); if (Subtarget.hasLFIWAX() || Subtarget.isPPC64()) @@ -578,19 +582,25 @@ if (Subtarget.hasSPE()) { setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Legal); setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal); - } else + } else { + setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Expand); setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); + } } // With the instructions enabled under FPCVT, we can do everything. if (Subtarget.hasFPCVT()) { if (Subtarget.has64BitSupport()) { + setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom); + setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); } + setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom); + setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); @@ -1576,6 +1586,8 @@ case PPCISD::MAT_PCREL_ADDR: return "PPCISD::MAT_PCREL_ADDR"; case PPCISD::LD_SPLAT: return "PPCISD::LD_SPLAT"; case PPCISD::FNMSUB: return "PPCISD::FNMSUB"; + case PPCISD::STRICT_MFVSR: + return "PPCISD::STRICT_MFVSR"; } return nullptr; } @@ -8206,11 +8218,19 @@ static SDValue convertFPToInt(SDValue Op, SelectionDAG &DAG, const PPCSubtarget &Subtarget) { SDLoc dl(Op); - bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT; - SDValue Src = Op.getOperand(0); + bool IsStrict = Op->isStrictFPOpcode(); + bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT || + Op.getOpcode() == ISD::STRICT_FP_TO_SINT; + // For strict nodes, source is the second operand. + SDValue Src = Op.getOperand(IsStrict ? 1 : 0); assert(Src.getValueType().isFloatingPoint()); - if (Src.getValueType() == MVT::f32) - Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src); + if (Src.getValueType() == MVT::f32) { + if (IsStrict) + Src = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {MVT::f64, MVT::Other}, + {Op.getOperand(0), Src}); + else + Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src); + } SDValue Conv; switch (Op.getSimpleValueType().SimpleTy) { default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!"); @@ -8233,7 +8253,8 @@ SelectionDAG &DAG, const SDLoc &dl) const { SDValue Tmp = convertFPToInt(Op, DAG, Subtarget); - bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT; + bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT || + Op.getOpcode() == ISD::STRICT_FP_TO_SINT; // Convert the FP value to an int value through memory. bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() && @@ -8277,14 +8298,18 @@ SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op, SelectionDAG &DAG, const SDLoc &dl) const { - assert(Op.getOperand(0).getValueType().isFloatingPoint()); - return DAG.getNode(PPCISD::MFVSR, dl, Op.getSimpleValueType().SimpleTy, - convertFPToInt(Op, DAG, Subtarget)); + if (Op->isStrictFPOpcode()) + return DAG.getNode(PPCISD::STRICT_MFVSR, dl, + {Op.getValueType(), MVT::Other}, + {Op.getOperand(0), convertFPToInt(Op, DAG, Subtarget)}); + else + return DAG.getNode(PPCISD::MFVSR, dl, Op.getValueType(), + convertFPToInt(Op, DAG, Subtarget)); } SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, const SDLoc &dl) const { - SDValue Src = Op.getOperand(0); + SDValue Src = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0); // FP to INT conversions are legal for f128. if (Src.getValueType() == MVT::f128) return Op; @@ -11121,6 +11146,8 @@ case ISD::STORE: return LowerSTORE(Op, DAG); case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG); case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); + case ISD::STRICT_FP_TO_UINT: + case ISD::STRICT_FP_TO_SINT: case ISD::FP_TO_UINT: case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, SDLoc(Op)); case ISD::UINT_TO_FP: @@ -11209,10 +11236,13 @@ } return; } + case ISD::STRICT_FP_TO_SINT: + case ISD::STRICT_FP_TO_UINT: case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: // LowerFP_TO_INT() can only handle f32 and f64. - if (N->getOperand(0).getValueType() == MVT::ppcf128) + if (N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType() == + MVT::ppcf128) return; Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl)); return; diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td --- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td +++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td @@ -140,6 +140,8 @@ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; def PPCSToV : SDNode<"PPCISD::SCALAR_TO_VECTOR_PERMUTED", SDTypeProfile<1, 1, []>, []>; +def PPCstrict_mfvsr : SDNode<"PPCISD::STRICT_MFVSR", SDTUnaryOp, + [SDNPHasChain]>; //-------------------------- Predicate definitions ---------------------------// def HasVSX : Predicate<"Subtarget->hasVSX()">; @@ -3326,6 +3328,12 @@ def : Pat<(i64 (llround f32:$S)), (i64 (MFVSRD (FCTID (XSRDPI (COPY_TO_REGCLASS $S, VSFRC)))))>; +// PPC-specific direct move operators. +def : Pat<(i64 (PPCstrict_mfvsr f64:$A)), + (i64 (MFVSRD f64:$A))>; +def : Pat<(i32 (PPCstrict_mfvsr f64:$A)), + (i32 (MFVSRWZ f64:$A))>; + // Alternate patterns for PPCmtvsrz where the output is v8i16 or v16i8 instead // of f64 def : Pat<(v8i16 (PPCmtvsrz i32:$A)), @@ -3761,11 +3769,11 @@ (f128 (XSCVUDQP (LXSIBZX xoaddr:$src)))>; // Truncate & Convert QP -> (Un)Signed (D)Word. -def : Pat<(i64 (fp_to_sint f128:$src)), (i64 (MFVRD (XSCVQPSDZ $src)))>; -def : Pat<(i64 (fp_to_uint f128:$src)), (i64 (MFVRD (XSCVQPUDZ $src)))>; -def : Pat<(i32 (fp_to_sint f128:$src)), +def : Pat<(i64 (any_fp_to_sint f128:$src)), (i64 (MFVRD (XSCVQPSDZ $src)))>; +def : Pat<(i64 (any_fp_to_uint f128:$src)), (i64 (MFVRD (XSCVQPUDZ $src)))>; +def : Pat<(i32 (any_fp_to_sint f128:$src)), (i32 (MFVSRWZ (COPY_TO_REGCLASS (XSCVQPSWZ $src), VFRC)))>; -def : Pat<(i32 (fp_to_uint f128:$src)), +def : Pat<(i32 (any_fp_to_uint f128:$src)), (i32 (MFVSRWZ (COPY_TO_REGCLASS (XSCVQPUWZ $src), VFRC)))>; // Instructions for store(fptosi). diff --git a/llvm/test/CodeGen/PowerPC/fp-strict-conv-f128.ll b/llvm/test/CodeGen/PowerPC/fp-strict-conv-f128.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/fp-strict-conv-f128.ll @@ -0,0 +1,566 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: < %s -mtriple=powerpc64-unknown-linux -mcpu=pwr8 | FileCheck %s\ +; RUN: -check-prefix=P8 +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: < %s -mtriple=powerpc64le-unknown-linux -mcpu=pwr9 | FileCheck %s \ +; RUN: -check-prefix=P9 +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: < %s -mtriple=powerpc64le-unknown-linux -mcpu=pwr8 -mattr=-vsx \ +; RUN: | FileCheck %s -check-prefix=NOVSX + +declare i32 @llvm.experimental.constrained.fptosi.i32.f128(fp128, metadata) +declare i64 @llvm.experimental.constrained.fptosi.i64.f128(fp128, metadata) +declare i64 @llvm.experimental.constrained.fptoui.i64.f128(fp128, metadata) +declare i32 @llvm.experimental.constrained.fptoui.i32.f128(fp128, metadata) + +declare i32 @llvm.experimental.constrained.fptosi.i32.ppcf128(ppc_fp128, metadata) +declare i64 @llvm.experimental.constrained.fptosi.i64.ppcf128(ppc_fp128, metadata) +declare i64 @llvm.experimental.constrained.fptoui.i64.ppcf128(ppc_fp128, metadata) +declare i32 @llvm.experimental.constrained.fptoui.i32.ppcf128(ppc_fp128, metadata) + +declare i128 @llvm.experimental.constrained.fptosi.i128.ppcf128(ppc_fp128, metadata) +declare i128 @llvm.experimental.constrained.fptoui.i128.ppcf128(ppc_fp128, metadata) +declare i128 @llvm.experimental.constrained.fptosi.i128.f128(fp128, metadata) +declare i128 @llvm.experimental.constrained.fptoui.i128.f128(fp128, metadata) + +define i128 @q_to_i128(fp128 %m) #0 { +; P8-LABEL: q_to_i128: +; P8: # %bb.0: # %entry +; P8-NEXT: mflr r0 +; P8-NEXT: std r0, 16(r1) +; P8-NEXT: stdu r1, -112(r1) +; P8-NEXT: .cfi_def_cfa_offset 112 +; P8-NEXT: .cfi_offset lr, 16 +; P8-NEXT: bl __fixtfti +; P8-NEXT: nop +; P8-NEXT: addi r1, r1, 112 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; P9-LABEL: q_to_i128: +; P9: # %bb.0: # %entry +; P9-NEXT: mflr r0 +; P9-NEXT: std r0, 16(r1) +; P9-NEXT: stdu r1, -32(r1) +; P9-NEXT: .cfi_def_cfa_offset 32 +; P9-NEXT: .cfi_offset lr, 16 +; P9-NEXT: bl __fixtfti +; P9-NEXT: nop +; P9-NEXT: addi r1, r1, 32 +; P9-NEXT: ld r0, 16(r1) +; P9-NEXT: mtlr r0 +; P9-NEXT: blr +; +; NOVSX-LABEL: q_to_i128: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: mflr r0 +; NOVSX-NEXT: std r0, 16(r1) +; NOVSX-NEXT: stdu r1, -32(r1) +; NOVSX-NEXT: .cfi_def_cfa_offset 32 +; NOVSX-NEXT: .cfi_offset lr, 16 +; NOVSX-NEXT: bl __fixtfti +; NOVSX-NEXT: nop +; NOVSX-NEXT: addi r1, r1, 32 +; NOVSX-NEXT: ld r0, 16(r1) +; NOVSX-NEXT: mtlr r0 +; NOVSX-NEXT: blr +entry: + %conv = tail call i128 @llvm.experimental.constrained.fptosi.i128.f128(fp128 %m, metadata !"fpexcept.strict") #0 + ret i128 %conv +} + +define i128 @q_to_u128(fp128 %m) #0 { +; P8-LABEL: q_to_u128: +; P8: # %bb.0: # %entry +; P8-NEXT: mflr r0 +; P8-NEXT: std r0, 16(r1) +; P8-NEXT: stdu r1, -112(r1) +; P8-NEXT: .cfi_def_cfa_offset 112 +; P8-NEXT: .cfi_offset lr, 16 +; P8-NEXT: bl __fixunstfti +; P8-NEXT: nop +; P8-NEXT: addi r1, r1, 112 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; P9-LABEL: q_to_u128: +; P9: # %bb.0: # %entry +; P9-NEXT: mflr r0 +; P9-NEXT: std r0, 16(r1) +; P9-NEXT: stdu r1, -32(r1) +; P9-NEXT: .cfi_def_cfa_offset 32 +; P9-NEXT: .cfi_offset lr, 16 +; P9-NEXT: bl __fixunstfti +; P9-NEXT: nop +; P9-NEXT: addi r1, r1, 32 +; P9-NEXT: ld r0, 16(r1) +; P9-NEXT: mtlr r0 +; P9-NEXT: blr +; +; NOVSX-LABEL: q_to_u128: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: mflr r0 +; NOVSX-NEXT: std r0, 16(r1) +; NOVSX-NEXT: stdu r1, -32(r1) +; NOVSX-NEXT: .cfi_def_cfa_offset 32 +; NOVSX-NEXT: .cfi_offset lr, 16 +; NOVSX-NEXT: bl __fixunstfti +; NOVSX-NEXT: nop +; NOVSX-NEXT: addi r1, r1, 32 +; NOVSX-NEXT: ld r0, 16(r1) +; NOVSX-NEXT: mtlr r0 +; NOVSX-NEXT: blr +entry: + %conv = tail call i128 @llvm.experimental.constrained.fptoui.i128.f128(fp128 %m, metadata !"fpexcept.strict") #0 + ret i128 %conv +} + +define i128 @ppcq_to_i128(ppc_fp128 %m) #0 { +; P8-LABEL: ppcq_to_i128: +; P8: # %bb.0: # %entry +; P8-NEXT: mflr r0 +; P8-NEXT: std r0, 16(r1) +; P8-NEXT: stdu r1, -112(r1) +; P8-NEXT: .cfi_def_cfa_offset 112 +; P8-NEXT: .cfi_offset lr, 16 +; P8-NEXT: bl __fixtfti +; P8-NEXT: nop +; P8-NEXT: addi r1, r1, 112 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; P9-LABEL: ppcq_to_i128: +; P9: # %bb.0: # %entry +; P9-NEXT: mflr r0 +; P9-NEXT: std r0, 16(r1) +; P9-NEXT: stdu r1, -32(r1) +; P9-NEXT: .cfi_def_cfa_offset 32 +; P9-NEXT: .cfi_offset lr, 16 +; P9-NEXT: bl __fixtfti +; P9-NEXT: nop +; P9-NEXT: addi r1, r1, 32 +; P9-NEXT: ld r0, 16(r1) +; P9-NEXT: mtlr r0 +; P9-NEXT: blr +; +; NOVSX-LABEL: ppcq_to_i128: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: mflr r0 +; NOVSX-NEXT: std r0, 16(r1) +; NOVSX-NEXT: stdu r1, -32(r1) +; NOVSX-NEXT: .cfi_def_cfa_offset 32 +; NOVSX-NEXT: .cfi_offset lr, 16 +; NOVSX-NEXT: bl __fixtfti +; NOVSX-NEXT: nop +; NOVSX-NEXT: addi r1, r1, 32 +; NOVSX-NEXT: ld r0, 16(r1) +; NOVSX-NEXT: mtlr r0 +; NOVSX-NEXT: blr +entry: + %conv = tail call i128 @llvm.experimental.constrained.fptosi.i128.ppcf128(ppc_fp128 %m, metadata !"fpexcept.strict") #0 + ret i128 %conv +} + +define i128 @ppcq_to_u128(ppc_fp128 %m) #0 { +; P8-LABEL: ppcq_to_u128: +; P8: # %bb.0: # %entry +; P8-NEXT: mflr r0 +; P8-NEXT: std r0, 16(r1) +; P8-NEXT: stdu r1, -112(r1) +; P8-NEXT: .cfi_def_cfa_offset 112 +; P8-NEXT: .cfi_offset lr, 16 +; P8-NEXT: bl __fixtfti +; P8-NEXT: nop +; P8-NEXT: addi r1, r1, 112 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; P9-LABEL: ppcq_to_u128: +; P9: # %bb.0: # %entry +; P9-NEXT: mflr r0 +; P9-NEXT: std r0, 16(r1) +; P9-NEXT: stdu r1, -32(r1) +; P9-NEXT: .cfi_def_cfa_offset 32 +; P9-NEXT: .cfi_offset lr, 16 +; P9-NEXT: bl __fixtfti +; P9-NEXT: nop +; P9-NEXT: addi r1, r1, 32 +; P9-NEXT: ld r0, 16(r1) +; P9-NEXT: mtlr r0 +; P9-NEXT: blr +; +; NOVSX-LABEL: ppcq_to_u128: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: mflr r0 +; NOVSX-NEXT: std r0, 16(r1) +; NOVSX-NEXT: stdu r1, -32(r1) +; NOVSX-NEXT: .cfi_def_cfa_offset 32 +; NOVSX-NEXT: .cfi_offset lr, 16 +; NOVSX-NEXT: bl __fixtfti +; NOVSX-NEXT: nop +; NOVSX-NEXT: addi r1, r1, 32 +; NOVSX-NEXT: ld r0, 16(r1) +; NOVSX-NEXT: mtlr r0 +; NOVSX-NEXT: blr +entry: + %conv = tail call i128 @llvm.experimental.constrained.fptosi.i128.ppcf128(ppc_fp128 %m, metadata !"fpexcept.strict") #0 + ret i128 %conv +} + +define signext i32 @q_to_i32(fp128 %m) #0 { +; P8-LABEL: q_to_i32: +; P8: # %bb.0: # %entry +; P8-NEXT: mflr r0 +; P8-NEXT: std r0, 16(r1) +; P8-NEXT: stdu r1, -112(r1) +; P8-NEXT: .cfi_def_cfa_offset 112 +; P8-NEXT: .cfi_offset lr, 16 +; P8-NEXT: bl __fixkfsi +; P8-NEXT: nop +; P8-NEXT: extsw r3, r3 +; P8-NEXT: addi r1, r1, 112 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; P9-LABEL: q_to_i32: +; P9: # %bb.0: # %entry +; P9-NEXT: xscvqpswz v2, v2 +; P9-NEXT: mfvsrwz r3, v2 +; P9-NEXT: extsw r3, r3 +; P9-NEXT: blr +; +; NOVSX-LABEL: q_to_i32: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: mflr r0 +; NOVSX-NEXT: std r0, 16(r1) +; NOVSX-NEXT: stdu r1, -32(r1) +; NOVSX-NEXT: .cfi_def_cfa_offset 32 +; NOVSX-NEXT: .cfi_offset lr, 16 +; NOVSX-NEXT: bl __fixkfsi +; NOVSX-NEXT: nop +; NOVSX-NEXT: extsw r3, r3 +; NOVSX-NEXT: addi r1, r1, 32 +; NOVSX-NEXT: ld r0, 16(r1) +; NOVSX-NEXT: mtlr r0 +; NOVSX-NEXT: blr +entry: + %conv = tail call i32 @llvm.experimental.constrained.fptosi.i32.f128(fp128 %m, metadata !"fpexcept.strict") #0 + ret i32 %conv +} + +define i64 @q_to_i64(fp128 %m) #0 { +; P8-LABEL: q_to_i64: +; P8: # %bb.0: # %entry +; P8-NEXT: mflr r0 +; P8-NEXT: std r0, 16(r1) +; P8-NEXT: stdu r1, -112(r1) +; P8-NEXT: .cfi_def_cfa_offset 112 +; P8-NEXT: .cfi_offset lr, 16 +; P8-NEXT: bl __fixkfdi +; P8-NEXT: nop +; P8-NEXT: addi r1, r1, 112 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; P9-LABEL: q_to_i64: +; P9: # %bb.0: # %entry +; P9-NEXT: xscvqpsdz v2, v2 +; P9-NEXT: mfvsrd r3, v2 +; P9-NEXT: blr +; +; NOVSX-LABEL: q_to_i64: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: mflr r0 +; NOVSX-NEXT: std r0, 16(r1) +; NOVSX-NEXT: stdu r1, -32(r1) +; NOVSX-NEXT: .cfi_def_cfa_offset 32 +; NOVSX-NEXT: .cfi_offset lr, 16 +; NOVSX-NEXT: bl __fixkfdi +; NOVSX-NEXT: nop +; NOVSX-NEXT: addi r1, r1, 32 +; NOVSX-NEXT: ld r0, 16(r1) +; NOVSX-NEXT: mtlr r0 +; NOVSX-NEXT: blr +entry: + %conv = tail call i64 @llvm.experimental.constrained.fptosi.i64.f128(fp128 %m, metadata !"fpexcept.strict") #0 + ret i64 %conv +} + +define i64 @q_to_u64(fp128 %m) #0 { +; P8-LABEL: q_to_u64: +; P8: # %bb.0: # %entry +; P8-NEXT: mflr r0 +; P8-NEXT: std r0, 16(r1) +; P8-NEXT: stdu r1, -112(r1) +; P8-NEXT: .cfi_def_cfa_offset 112 +; P8-NEXT: .cfi_offset lr, 16 +; P8-NEXT: bl __fixunskfdi +; P8-NEXT: nop +; P8-NEXT: addi r1, r1, 112 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; P9-LABEL: q_to_u64: +; P9: # %bb.0: # %entry +; P9-NEXT: xscvqpudz v2, v2 +; P9-NEXT: mfvsrd r3, v2 +; P9-NEXT: blr +; +; NOVSX-LABEL: q_to_u64: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: mflr r0 +; NOVSX-NEXT: std r0, 16(r1) +; NOVSX-NEXT: stdu r1, -32(r1) +; NOVSX-NEXT: .cfi_def_cfa_offset 32 +; NOVSX-NEXT: .cfi_offset lr, 16 +; NOVSX-NEXT: bl __fixunskfdi +; NOVSX-NEXT: nop +; NOVSX-NEXT: addi r1, r1, 32 +; NOVSX-NEXT: ld r0, 16(r1) +; NOVSX-NEXT: mtlr r0 +; NOVSX-NEXT: blr +entry: + %conv = tail call i64 @llvm.experimental.constrained.fptoui.i64.f128(fp128 %m, metadata !"fpexcept.strict") #0 + ret i64 %conv +} + +define zeroext i32 @q_to_u32(fp128 %m) #0 { +; P8-LABEL: q_to_u32: +; P8: # %bb.0: # %entry +; P8-NEXT: mflr r0 +; P8-NEXT: std r0, 16(r1) +; P8-NEXT: stdu r1, -112(r1) +; P8-NEXT: .cfi_def_cfa_offset 112 +; P8-NEXT: .cfi_offset lr, 16 +; P8-NEXT: bl __fixunskfsi +; P8-NEXT: nop +; P8-NEXT: addi r1, r1, 112 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; P9-LABEL: q_to_u32: +; P9: # %bb.0: # %entry +; P9-NEXT: xscvqpuwz v2, v2 +; P9-NEXT: mfvsrwz r3, v2 +; P9-NEXT: clrldi r3, r3, 32 +; P9-NEXT: blr +; +; NOVSX-LABEL: q_to_u32: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: mflr r0 +; NOVSX-NEXT: std r0, 16(r1) +; NOVSX-NEXT: stdu r1, -32(r1) +; NOVSX-NEXT: .cfi_def_cfa_offset 32 +; NOVSX-NEXT: .cfi_offset lr, 16 +; NOVSX-NEXT: bl __fixunskfsi +; NOVSX-NEXT: nop +; NOVSX-NEXT: addi r1, r1, 32 +; NOVSX-NEXT: ld r0, 16(r1) +; NOVSX-NEXT: mtlr r0 +; NOVSX-NEXT: blr +entry: + %conv = tail call i32 @llvm.experimental.constrained.fptoui.i32.f128(fp128 %m, metadata !"fpexcept.strict") #0 + ret i32 %conv +} + +define signext i32 @ppcq_to_i32(ppc_fp128 %m) #0 { +; P8-LABEL: ppcq_to_i32: +; P8: # %bb.0: # %entry +; P8-NEXT: mflr r0 +; P8-NEXT: std r0, 16(r1) +; P8-NEXT: stdu r1, -112(r1) +; P8-NEXT: .cfi_def_cfa_offset 112 +; P8-NEXT: .cfi_offset lr, 16 +; P8-NEXT: bl __gcc_qtou +; P8-NEXT: nop +; P8-NEXT: extsw r3, r3 +; P8-NEXT: addi r1, r1, 112 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; P9-LABEL: ppcq_to_i32: +; P9: # %bb.0: # %entry +; P9-NEXT: mflr r0 +; P9-NEXT: std r0, 16(r1) +; P9-NEXT: stdu r1, -32(r1) +; P9-NEXT: .cfi_def_cfa_offset 32 +; P9-NEXT: .cfi_offset lr, 16 +; P9-NEXT: bl __gcc_qtou +; P9-NEXT: nop +; P9-NEXT: extsw r3, r3 +; P9-NEXT: addi r1, r1, 32 +; P9-NEXT: ld r0, 16(r1) +; P9-NEXT: mtlr r0 +; P9-NEXT: blr +; +; NOVSX-LABEL: ppcq_to_i32: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: mflr r0 +; NOVSX-NEXT: std r0, 16(r1) +; NOVSX-NEXT: stdu r1, -32(r1) +; NOVSX-NEXT: .cfi_def_cfa_offset 32 +; NOVSX-NEXT: .cfi_offset lr, 16 +; NOVSX-NEXT: bl __gcc_qtou +; NOVSX-NEXT: nop +; NOVSX-NEXT: extsw r3, r3 +; NOVSX-NEXT: addi r1, r1, 32 +; NOVSX-NEXT: ld r0, 16(r1) +; NOVSX-NEXT: mtlr r0 +; NOVSX-NEXT: blr +entry: + %conv = tail call i32 @llvm.experimental.constrained.fptosi.i32.ppcf128(ppc_fp128 %m, metadata !"fpexcept.strict") #0 + ret i32 %conv +} + +define i64 @ppcq_to_i64(ppc_fp128 %m) #0 { +; P8-LABEL: ppcq_to_i64: +; P8: # %bb.0: # %entry +; P8-NEXT: mflr r0 +; P8-NEXT: std r0, 16(r1) +; P8-NEXT: stdu r1, -112(r1) +; P8-NEXT: .cfi_def_cfa_offset 112 +; P8-NEXT: .cfi_offset lr, 16 +; P8-NEXT: bl __fixtfdi +; P8-NEXT: nop +; P8-NEXT: addi r1, r1, 112 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; P9-LABEL: ppcq_to_i64: +; P9: # %bb.0: # %entry +; P9-NEXT: mflr r0 +; P9-NEXT: std r0, 16(r1) +; P9-NEXT: stdu r1, -32(r1) +; P9-NEXT: .cfi_def_cfa_offset 32 +; P9-NEXT: .cfi_offset lr, 16 +; P9-NEXT: bl __fixtfdi +; P9-NEXT: nop +; P9-NEXT: addi r1, r1, 32 +; P9-NEXT: ld r0, 16(r1) +; P9-NEXT: mtlr r0 +; P9-NEXT: blr +; +; NOVSX-LABEL: ppcq_to_i64: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: mflr r0 +; NOVSX-NEXT: std r0, 16(r1) +; NOVSX-NEXT: stdu r1, -32(r1) +; NOVSX-NEXT: .cfi_def_cfa_offset 32 +; NOVSX-NEXT: .cfi_offset lr, 16 +; NOVSX-NEXT: bl __fixtfdi +; NOVSX-NEXT: nop +; NOVSX-NEXT: addi r1, r1, 32 +; NOVSX-NEXT: ld r0, 16(r1) +; NOVSX-NEXT: mtlr r0 +; NOVSX-NEXT: blr +entry: + %conv = tail call i64 @llvm.experimental.constrained.fptosi.i64.ppcf128(ppc_fp128 %m, metadata !"fpexcept.strict") #0 + ret i64 %conv +} + +define i64 @ppcq_to_u64(ppc_fp128 %m) #0 { +; P8-LABEL: ppcq_to_u64: +; P8: # %bb.0: # %entry +; P8-NEXT: mflr r0 +; P8-NEXT: std r0, 16(r1) +; P8-NEXT: stdu r1, -112(r1) +; P8-NEXT: .cfi_def_cfa_offset 112 +; P8-NEXT: .cfi_offset lr, 16 +; P8-NEXT: bl __fixunstfdi +; P8-NEXT: nop +; P8-NEXT: addi r1, r1, 112 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; P9-LABEL: ppcq_to_u64: +; P9: # %bb.0: # %entry +; P9-NEXT: mflr r0 +; P9-NEXT: std r0, 16(r1) +; P9-NEXT: stdu r1, -32(r1) +; P9-NEXT: .cfi_def_cfa_offset 32 +; P9-NEXT: .cfi_offset lr, 16 +; P9-NEXT: bl __fixunstfdi +; P9-NEXT: nop +; P9-NEXT: addi r1, r1, 32 +; P9-NEXT: ld r0, 16(r1) +; P9-NEXT: mtlr r0 +; P9-NEXT: blr +; +; NOVSX-LABEL: ppcq_to_u64: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: mflr r0 +; NOVSX-NEXT: std r0, 16(r1) +; NOVSX-NEXT: stdu r1, -32(r1) +; NOVSX-NEXT: .cfi_def_cfa_offset 32 +; NOVSX-NEXT: .cfi_offset lr, 16 +; NOVSX-NEXT: bl __fixunstfdi +; NOVSX-NEXT: nop +; NOVSX-NEXT: addi r1, r1, 32 +; NOVSX-NEXT: ld r0, 16(r1) +; NOVSX-NEXT: mtlr r0 +; NOVSX-NEXT: blr +entry: + %conv = tail call i64 @llvm.experimental.constrained.fptoui.i64.ppcf128(ppc_fp128 %m, metadata !"fpexcept.strict") #0 + ret i64 %conv +} + +define zeroext i32 @ppcq_to_u32(ppc_fp128 %m) #0 { +; P8-LABEL: ppcq_to_u32: +; P8: # %bb.0: # %entry +; P8-NEXT: mflr r0 +; P8-NEXT: std r0, 16(r1) +; P8-NEXT: stdu r1, -112(r1) +; P8-NEXT: .cfi_def_cfa_offset 112 +; P8-NEXT: .cfi_offset lr, 16 +; P8-NEXT: bl __fixunstfsi +; P8-NEXT: nop +; P8-NEXT: addi r1, r1, 112 +; P8-NEXT: ld r0, 16(r1) +; P8-NEXT: mtlr r0 +; P8-NEXT: blr +; +; P9-LABEL: ppcq_to_u32: +; P9: # %bb.0: # %entry +; P9-NEXT: mflr r0 +; P9-NEXT: std r0, 16(r1) +; P9-NEXT: stdu r1, -32(r1) +; P9-NEXT: .cfi_def_cfa_offset 32 +; P9-NEXT: .cfi_offset lr, 16 +; P9-NEXT: bl __fixunstfsi +; P9-NEXT: nop +; P9-NEXT: addi r1, r1, 32 +; P9-NEXT: ld r0, 16(r1) +; P9-NEXT: mtlr r0 +; P9-NEXT: blr +; +; NOVSX-LABEL: ppcq_to_u32: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: mflr r0 +; NOVSX-NEXT: std r0, 16(r1) +; NOVSX-NEXT: stdu r1, -32(r1) +; NOVSX-NEXT: .cfi_def_cfa_offset 32 +; NOVSX-NEXT: .cfi_offset lr, 16 +; NOVSX-NEXT: bl __fixunstfsi +; NOVSX-NEXT: nop +; NOVSX-NEXT: addi r1, r1, 32 +; NOVSX-NEXT: ld r0, 16(r1) +; NOVSX-NEXT: mtlr r0 +; NOVSX-NEXT: blr +entry: + %conv = tail call i32 @llvm.experimental.constrained.fptoui.i32.ppcf128(ppc_fp128 %m, metadata !"fpexcept.strict") #0 + ret i32 %conv +} + +attributes #0 = { strictfp } diff --git a/llvm/test/CodeGen/PowerPC/fp-strict-conv.ll b/llvm/test/CodeGen/PowerPC/fp-strict-conv.ll --- a/llvm/test/CodeGen/PowerPC/fp-strict-conv.ll +++ b/llvm/test/CodeGen/PowerPC/fp-strict-conv.ll @@ -2,6 +2,13 @@ ; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names < %s -mcpu=e500 \ ; RUN: -mtriple=powerpc-unknown-linux-gnu -mattr=spe | FileCheck %s \ ; RUN: -check-prefix=SPE +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: < %s -mtriple=powerpc64-unknown-linux -mcpu=pwr8 | FileCheck %s +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: < %s -mtriple=powerpc64le-unknown-linux -mcpu=pwr9 | FileCheck %s +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ +; RUN: < %s -mtriple=powerpc64le-unknown-linux -mcpu=pwr8 -mattr=-vsx | \ +; RUN: FileCheck %s -check-prefix=NOVSX declare i32 @llvm.experimental.constrained.fptosi.i32.f64(double, metadata) declare i64 @llvm.experimental.constrained.fptosi.i64.f64(double, metadata) @@ -29,6 +36,20 @@ ; SPE-NEXT: evmergelo r3, r3, r4 ; SPE-NEXT: efdctsiz r3, r3 ; SPE-NEXT: blr +; +; CHECK-LABEL: d_to_i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xscvdpsxws f0, f1 +; CHECK-NEXT: mffprwz r3, f0 +; CHECK-NEXT: blr +; +; NOVSX-LABEL: d_to_i32: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: fctiwz f0, f1 +; NOVSX-NEXT: addi r3, r1, -4 +; NOVSX-NEXT: stfiwx f0, 0, r3 +; NOVSX-NEXT: lwz r3, -4(r1) +; NOVSX-NEXT: blr entry: %conv = call i32 @llvm.experimental.constrained.fptosi.i32.f64(double %m, metadata !"fpexcept.strict") #0 ret i32 %conv @@ -51,6 +72,19 @@ ; SPE-NEXT: addi r1, r1, 16 ; SPE-NEXT: mtlr r0 ; SPE-NEXT: blr +; +; CHECK-LABEL: d_to_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xscvdpsxds f0, f1 +; CHECK-NEXT: mffprd r3, f0 +; CHECK-NEXT: blr +; +; NOVSX-LABEL: d_to_i64: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: fctidz f0, f1 +; NOVSX-NEXT: stfd f0, -8(r1) +; NOVSX-NEXT: ld r3, -8(r1) +; NOVSX-NEXT: blr entry: %conv = call i64 @llvm.experimental.constrained.fptosi.i64.f64(double %m, metadata !"fpexcept.strict") #0 ret i64 %conv @@ -73,6 +107,19 @@ ; SPE-NEXT: addi r1, r1, 16 ; SPE-NEXT: mtlr r0 ; SPE-NEXT: blr +; +; CHECK-LABEL: d_to_u64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xscvdpuxds f0, f1 +; CHECK-NEXT: mffprd r3, f0 +; CHECK-NEXT: blr +; +; NOVSX-LABEL: d_to_u64: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: fctiduz f0, f1 +; NOVSX-NEXT: stfd f0, -8(r1) +; NOVSX-NEXT: ld r3, -8(r1) +; NOVSX-NEXT: blr entry: %conv = call i64 @llvm.experimental.constrained.fptoui.i64.f64(double %m, metadata !"fpexcept.strict") #0 ret i64 %conv @@ -84,6 +131,21 @@ ; SPE-NEXT: evmergelo r3, r3, r4 ; SPE-NEXT: efdctuiz r3, r3 ; SPE-NEXT: blr +; +; CHECK-LABEL: d_to_u32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xscvdpuxws f0, f1 +; CHECK-NEXT: mffprwz r3, f0 +; CHECK-NEXT: clrldi r3, r3, 32 +; CHECK-NEXT: blr +; +; NOVSX-LABEL: d_to_u32: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: fctiwuz f0, f1 +; NOVSX-NEXT: addi r3, r1, -4 +; NOVSX-NEXT: stfiwx f0, 0, r3 +; NOVSX-NEXT: lwz r3, -4(r1) +; NOVSX-NEXT: blr entry: %conv = call i32 @llvm.experimental.constrained.fptoui.i32.f64(double %m, metadata !"fpexcept.strict") #0 ret i32 %conv @@ -94,6 +156,21 @@ ; SPE: # %bb.0: # %entry ; SPE-NEXT: efsctsiz r3, r3 ; SPE-NEXT: blr +; +; CHECK-LABEL: f_to_i32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xscvdpsxws f0, f1 +; CHECK-NEXT: mffprwz r3, f0 +; CHECK-NEXT: extsw r3, r3 +; CHECK-NEXT: blr +; +; NOVSX-LABEL: f_to_i32: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: fctiwz f0, f1 +; NOVSX-NEXT: addi r3, r1, -4 +; NOVSX-NEXT: stfiwx f0, 0, r3 +; NOVSX-NEXT: lwa r3, -4(r1) +; NOVSX-NEXT: blr entry: %conv = call i32 @llvm.experimental.constrained.fptosi.i32.f32(float %m, metadata !"fpexcept.strict") #0 ret i32 %conv @@ -112,6 +189,19 @@ ; SPE-NEXT: addi r1, r1, 16 ; SPE-NEXT: mtlr r0 ; SPE-NEXT: blr +; +; CHECK-LABEL: f_to_i64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xscvdpsxds f0, f1 +; CHECK-NEXT: mffprd r3, f0 +; CHECK-NEXT: blr +; +; NOVSX-LABEL: f_to_i64: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: fctidz f0, f1 +; NOVSX-NEXT: stfd f0, -8(r1) +; NOVSX-NEXT: ld r3, -8(r1) +; NOVSX-NEXT: blr entry: %conv = call i64 @llvm.experimental.constrained.fptosi.i64.f32(float %m, metadata !"fpexcept.strict") #0 ret i64 %conv @@ -130,6 +220,19 @@ ; SPE-NEXT: addi r1, r1, 16 ; SPE-NEXT: mtlr r0 ; SPE-NEXT: blr +; +; CHECK-LABEL: f_to_u64: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xscvdpuxds f0, f1 +; CHECK-NEXT: mffprd r3, f0 +; CHECK-NEXT: blr +; +; NOVSX-LABEL: f_to_u64: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: fctiduz f0, f1 +; NOVSX-NEXT: stfd f0, -8(r1) +; NOVSX-NEXT: ld r3, -8(r1) +; NOVSX-NEXT: blr entry: %conv = call i64 @llvm.experimental.constrained.fptoui.i64.f32(float %m, metadata !"fpexcept.strict") #0 ret i64 %conv @@ -140,6 +243,21 @@ ; SPE: # %bb.0: # %entry ; SPE-NEXT: efsctuiz r3, r3 ; SPE-NEXT: blr +; +; CHECK-LABEL: f_to_u32: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xscvdpuxws f0, f1 +; CHECK-NEXT: mffprwz r3, f0 +; CHECK-NEXT: clrldi r3, r3, 32 +; CHECK-NEXT: blr +; +; NOVSX-LABEL: f_to_u32: +; NOVSX: # %bb.0: # %entry +; NOVSX-NEXT: fctiwuz f0, f1 +; NOVSX-NEXT: addi r3, r1, -4 +; NOVSX-NEXT: stfiwx f0, 0, r3 +; NOVSX-NEXT: lwz r3, -4(r1) +; NOVSX-NEXT: blr entry: %conv = call i32 @llvm.experimental.constrained.fptoui.i32.f32(float %m, metadata !"fpexcept.strict") #0 ret i32 %conv