diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -63,9 +63,14 @@ /// unsigned integers and single-precision outputs. FCFIDU, FCFIDS, FCFIDUS, + /// FCTI[DW] - The FCTID and FCTIW instructions, taking an f32 or f64 + /// operand, producing an f64 value containing the integer representation + /// of that FP value, rounded using the current rounding mode. + FCTID, FCTIW, + /// FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64 /// operand, producing an f64 value containing the integer representation - /// of that FP value. + /// of that FP value, rounding it toward zero. FCTIDZ, FCTIWZ, /// Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for @@ -996,6 +1001,8 @@ void spliceIntoChain(SDValue ResChain, SDValue NewResChain, SelectionDAG &DAG) const; + SDValue getPPCISDNodeForFP_TO_INT(SDValue Op, SelectionDAG &DAG, + const SDLoc &dl) const; void LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI, SelectionDAG &DAG, const SDLoc &dl) const; SDValue LowerFP_TO_INTDirectMove(SDValue Op, SelectionDAG &DAG, diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -528,12 +528,16 @@ setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); + setOperationAction(ISD::LLRINT, MVT::f32, Custom); + setOperationAction(ISD::LLRINT, MVT::f64, Custom); } setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); + setOperationAction(ISD::LRINT, MVT::f32, Custom); + setOperationAction(ISD::LRINT, MVT::f64, Custom); } if (Subtarget.use64BitRegs()) { @@ -1320,6 +1324,8 @@ case PPCISD::FCFIDU: return "PPCISD::FCFIDU"; case PPCISD::FCFIDS: return "PPCISD::FCFIDS"; case PPCISD::FCFIDUS: return "PPCISD::FCFIDUS"; + case PPCISD::FCTID: return "PPCISD::FCTID"; + case PPCISD::FCTIW: return "PPCISD::FCTIW"; case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ"; case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ"; case PPCISD::FCTIDUZ: return "PPCISD::FCTIDUZ"; @@ -7509,32 +7515,55 @@ return Op; } +SDValue PPCTargetLowering::getPPCISDNodeForFP_TO_INT(SDValue Op, + SelectionDAG &DAG, + const SDLoc &dl) const { + assert(Op.getOperand(0).getValueType().isFloatingPoint()); + SDValue Src = Op.getOperand(0); + + if (Src.getValueType() == MVT::f32) + Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src); + + MVT::SimpleValueType Ty = Op.getSimpleValueType().SimpleTy; + assert((Ty == MVT::i32 || Ty == MVT::i64) && + "SimpleValueType for operand must be 32- or 64-bit integer."); + + unsigned Opc = Op.getOpcode(); + unsigned PPCOpc; + + switch (Opc) { + default: + llvm_unreachable("Got invalid ISD node in FP_TO_INT custom expander!"); + case ISD::FP_TO_SINT: + PPCOpc = Ty == MVT::i32 ? PPCISD::FCTIWZ : PPCISD::FCTIDZ; + break; + case ISD::FP_TO_UINT: + if (Subtarget.hasFPCVT()) + PPCOpc = Ty == MVT::i32 ? PPCISD::FCTIWUZ : PPCISD::FCTIDUZ; + else if (Ty == MVT::i32) + PPCOpc = PPCISD::FCTIDZ; + else + llvm_unreachable("i64 FP_TO_UINT is supported only with FPCVT"); + break; + case ISD::LRINT: + assert(Subtarget.hasFPCVT() && + "Custom expander for LRINT only supported with FPCVT"); + PPCOpc = Ty == MVT::i32 ? PPCISD::FCTIW : PPCISD::FCTID; + break; + case ISD::LLRINT: + assert(Subtarget.hasFPCVT() && Subtarget.has64BitSupport() && + "Custom expander for LLRINT only supported on 64-bit with FPCVT"); + PPCOpc = PPCISD::FCTID; + break; + } + + return DAG.getNode(PPCOpc, dl, MVT::f64, Src); +} + void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI, SelectionDAG &DAG, const SDLoc &dl) const { - assert(Op.getOperand(0).getValueType().isFloatingPoint()); - SDValue Src = Op.getOperand(0); - if (Src.getValueType() == MVT::f32) - Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src); - - SDValue Tmp; - switch (Op.getSimpleValueType().SimpleTy) { - default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!"); - case MVT::i32: - Tmp = DAG.getNode( - Op.getOpcode() == ISD::FP_TO_SINT - ? PPCISD::FCTIWZ - : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ), - dl, MVT::f64, Src); - break; - case MVT::i64: - assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) && - "i64 FP_TO_UINT is supported only with FPCVT"); - Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ : - PPCISD::FCTIDUZ, - dl, MVT::f64, Src); - break; - } + SDValue Tmp = getPPCISDNodeForFP_TO_INT(Op, DAG, dl); // Convert the FP value to an int value through memory. bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() && @@ -7575,33 +7604,8 @@ SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op, SelectionDAG &DAG, const SDLoc &dl) const { - assert(Op.getOperand(0).getValueType().isFloatingPoint()); - SDValue Src = Op.getOperand(0); - - if (Src.getValueType() == MVT::f32) - Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src); - - SDValue Tmp; - switch (Op.getSimpleValueType().SimpleTy) { - default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!"); - case MVT::i32: - Tmp = DAG.getNode( - Op.getOpcode() == ISD::FP_TO_SINT - ? PPCISD::FCTIWZ - : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ), - dl, MVT::f64, Src); - Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i32, Tmp); - break; - case MVT::i64: - assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) && - "i64 FP_TO_UINT is supported only with FPCVT"); - Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ : - PPCISD::FCTIDUZ, - dl, MVT::f64, Src); - Tmp = DAG.getNode(PPCISD::MFVSR, dl, MVT::i64, Tmp); - break; - } - return Tmp; + SDValue Tmp = getPPCISDNodeForFP_TO_INT(Op, DAG, dl); + return DAG.getNode(PPCISD::MFVSR, dl, Op.getSimpleValueType().SimpleTy, Tmp); } SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, @@ -10307,6 +10311,8 @@ case ISD::STORE: return LowerSTORE(Op, DAG); case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG); case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); + case ISD::LRINT: + case ISD::LLRINT: case ISD::FP_TO_UINT: case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, SDLoc(Op)); case ISD::UINT_TO_FP: diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td --- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td @@ -1316,7 +1316,7 @@ [(set f64:$frD, (PPCfcfid f64:$frB))]>, isPPC64; defm FCTID : XForm_26r<63, 814, (outs f8rc:$frD), (ins f8rc:$frB), "fctid", "$frD, $frB", IIC_FPGeneral, - []>, isPPC64; + [(set f64:$frD, (PPCfctid f64:$frB))]>, isPPC64; defm FCTIDU : XForm_26r<63, 942, (outs f8rc:$frD), (ins f8rc:$frB), "fctidu", "$frD, $frB", IIC_FPGeneral, []>, isPPC64; diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -132,6 +132,8 @@ def PPCfcfidu : SDNode<"PPCISD::FCFIDU", SDTFPUnaryOp, []>; def PPCfcfids : SDNode<"PPCISD::FCFIDS", SDTFPRoundOp, []>; def PPCfcfidus: SDNode<"PPCISD::FCFIDUS", SDTFPRoundOp, []>; +def PPCfctid : SDNode<"PPCISD::FCTID", SDTFPUnaryOp, []>; +def PPCfctiw : SDNode<"PPCISD::FCTIW", SDTFPUnaryOp, []>; def PPCfctidz : SDNode<"PPCISD::FCTIDZ", SDTFPUnaryOp, []>; def PPCfctiwz : SDNode<"PPCISD::FCTIWZ", SDTFPUnaryOp, []>; def PPCfctiduz: SDNode<"PPCISD::FCTIDUZ",SDTFPUnaryOp, []>; @@ -2462,7 +2464,7 @@ let hasSideEffects = 0 in { defm FCTIW : XForm_26r<63, 14, (outs f8rc:$frD), (ins f8rc:$frB), "fctiw", "$frD, $frB", IIC_FPGeneral, - []>; + [(set f64:$frD, (PPCfctiw f64:$frB))]>; defm FCTIWU : XForm_26r<63, 142, (outs f8rc:$frD), (ins f8rc:$frB), "fctiwu", "$frD, $frB", IIC_FPGeneral, []>; diff --git a/llvm/test/CodeGen/PowerPC/llrint-conv.ll b/llvm/test/CodeGen/PowerPC/llrint-conv.ll --- a/llvm/test/CodeGen/PowerPC/llrint-conv.ll +++ b/llvm/test/CodeGen/PowerPC/llrint-conv.ll @@ -1,7 +1,12 @@ -; RUN: llc < %s -mtriple=powerpc64le | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64 -mattr=+fpcvt < %s \ +; RUN: | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le -mattr=-fpcvt < %s \ +; RUN: | FileCheck --check-prefix=CHECK-NOFPCVT %s ; CHECK-LABEL: testmsws: -; CHECK: bl llrintf +; CHECK: fctid 0, 1 +; CHECK-NOFPCVT: bl llrintf define signext i32 @testmsws(float %x) { entry: %0 = tail call i64 @llvm.llrint.f32(float %x) @@ -10,7 +15,8 @@ } ; CHECK-LABEL: testmsxs: -; CHECK: bl llrintf +; CHECK: fctid 0, 1 +; CHECK-NOFPCVT: bl llrintf define i64 @testmsxs(float %x) { entry: %0 = tail call i64 @llvm.llrint.f32(float %x) @@ -18,7 +24,8 @@ } ; CHECK-LABEL: testmswd: -; CHECK: bl llrint +; CHECK: fctid 0, 1 +; CHECK-NOFPCVT: bl llrint define signext i32 @testmswd(double %x) { entry: %0 = tail call i64 @llvm.llrint.f64(double %x) @@ -27,7 +34,8 @@ } ; CHECK-LABEL: testmsxd: -; CHECK: bl llrint +; CHECK: fctid 0, 1 +; CHECK-NOFPCVT: bl llrint define i64 @testmsxd(double %x) { entry: %0 = tail call i64 @llvm.llrint.f64(double %x) @@ -36,6 +44,7 @@ ; CHECK-LABEL: testmswl: ; CHECK: bl llrintl +; CHECK-NOFPCVT: bl llrintl define signext i32 @testmswl(ppc_fp128 %x) { entry: %0 = tail call i64 @llvm.llrint.ppcf128(ppc_fp128 %x) @@ -45,6 +54,7 @@ ; CHECK-LABEL: testmsll: ; CHECK: bl llrintl +; CHECK-NOFPCVT: bl llrintl define i64 @testmsll(ppc_fp128 %x) { entry: %0 = tail call i64 @llvm.llrint.ppcf128(ppc_fp128 %x) diff --git a/llvm/test/CodeGen/PowerPC/lrint-conv.ll b/llvm/test/CodeGen/PowerPC/lrint-conv.ll --- a/llvm/test/CodeGen/PowerPC/lrint-conv.ll +++ b/llvm/test/CodeGen/PowerPC/lrint-conv.ll @@ -1,7 +1,12 @@ -; RUN: llc < %s -mtriple=powerpc64le | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64 -mattr=+fpcvt < %s \ +; RUN: | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le -mattr=-fpcvt < %s \ +; RUN: | FileCheck --check-prefix=CHECK-NOFPCVT %s ; CHECK-LABEL: testmsws: -; CHECK: bl lrintf +; CHECK: fctid 0, 1 +; CHECK-NOFPCVT: bl lrintf define signext i32 @testmsws(float %x) { entry: %0 = tail call i64 @llvm.lrint.i64.f32(float %x) @@ -10,7 +15,8 @@ } ; CHECK-LABEL: testmsxs: -; CHECK: bl lrintf +; CHECK: fctid 0, 1 +; CHECK-NOFPCVT: bl lrintf define i64 @testmsxs(float %x) { entry: %0 = tail call i64 @llvm.lrint.i64.f32(float %x) @@ -18,7 +24,8 @@ } ; CHECK-LABEL: testmswd: -; CHECK: bl lrint +; CHECK: fctid 0, 1 +; CHECK-NOFPCVT: bl lrint define signext i32 @testmswd(double %x) { entry: %0 = tail call i64 @llvm.lrint.i64.f64(double %x) @@ -27,7 +34,8 @@ } ; CHECK-LABEL: testmsxd: -; CHECK: bl lrint +; CHECK: fctid 0, 1 +; CHECK-NOFPCVT: bl lrint define i64 @testmsxd(double %x) { entry: %0 = tail call i64 @llvm.lrint.i64.f64(double %x) @@ -36,6 +44,7 @@ ; CHECK-LABEL: testmswl: ; CHECK: bl lrintl +; CHECK-NOFPCVT: bl lrintl define signext i32 @testmswl(ppc_fp128 %x) { entry: %0 = tail call i64 @llvm.lrint.i64.ppcf128(ppc_fp128 %x) @@ -45,6 +54,7 @@ ; CHECK-LABEL: testmsll: ; CHECK: bl lrintl +; CHECK-NOFPCVT: bl lrintl define i64 @testmsll(ppc_fp128 %x) { entry: %0 = tail call i64 @llvm.lrint.i64.ppcf128(ppc_fp128 %x) diff --git a/llvm/test/CodeGen/PowerPC/pr43527.ll b/llvm/test/CodeGen/PowerPC/pr43527.ll --- a/llvm/test/CodeGen/PowerPC/pr43527.ll +++ b/llvm/test/CodeGen/PowerPC/pr43527.ll @@ -9,34 +9,20 @@ ; CHECK-NEXT: # %bb.1: # %bb3 ; CHECK-NEXT: bc 12, 4*cr5+lt, .LBB0_6 ; CHECK-NEXT: # %bb.2: # %bb4 -; CHECK-NEXT: mflr r0 -; CHECK-NEXT: .cfi_def_cfa_offset 64 -; CHECK-NEXT: .cfi_offset lr, 16 -; CHECK-NEXT: .cfi_offset r29, -24 -; CHECK-NEXT: .cfi_offset r30, -16 -; CHECK-NEXT: std r29, -24(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; CHECK-NEXT: std r0, 16(r1) -; CHECK-NEXT: stdu r1, -64(r1) -; CHECK-NEXT: sub r30, r4, r3 -; CHECK-NEXT: li r29, 0 -; CHECK-NEXT: .p2align 5 +; CHECK-NEXT: sub r3, r4, r3 +; CHECK-NEXT: mtctr r3 +; CHECK-NEXT: li r3, 0 +; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB0_3: # %bb5 ; CHECK-NEXT: # -; CHECK-NEXT: lfsx f1, 0, r29 -; CHECK-NEXT: bl lrint -; CHECK-NEXT: nop -; CHECK-NEXT: addi r30, r30, -1 -; CHECK-NEXT: addi r29, r29, 4 -; CHECK-NEXT: cmpldi r30, 0 -; CHECK-NEXT: bne cr0, .LBB0_3 +; CHECK-NEXT: mr r4, r3 +; CHECK-NEXT: addi r3, r3, 4 +; CHECK-NEXT: bdnz .LBB0_3 ; CHECK-NEXT: # %bb.4: # %bb15 +; CHECK-NEXT: lfsx f0, 0, r4 +; CHECK-NEXT: fctid f0, f0 +; CHECK-NEXT: mffprd r3, f0 ; CHECK-NEXT: stb r3, 0(r3) -; CHECK-NEXT: addi r1, r1, 64 -; CHECK-NEXT: ld r0, 16(r1) -; CHECK-NEXT: mtlr r0 -; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload -; CHECK-NEXT: ld r29, -24(r1) # 8-byte Folded Reload ; CHECK-NEXT: blr ; CHECK-NEXT: .LBB0_5: # %bb2 ; CHECK-NEXT: .LBB0_6: # %bb14