Index: lib/Target/PowerPC/P9InstrResources.td =================================================================== --- lib/Target/PowerPC/P9InstrResources.td +++ lib/Target/PowerPC/P9InstrResources.td @@ -531,6 +531,7 @@ (instregex "VEXTRACTU(B|H|W)$"), (instregex "VINSERT(B|H|W|D)$"), MFVSRLD, + MFVRLD, MTVSRWS, VBPERMQ, VCLZLSBB, Index: lib/Target/PowerPC/PPCISelLowering.h =================================================================== --- lib/Target/PowerPC/PPCISelLowering.h +++ lib/Target/PowerPC/PPCISelLowering.h @@ -196,6 +196,9 @@ /// Direct move of 2 consective GPR to a VSX register. BUILD_FP128, + /// Extract bits from a float128 + EXTRACT_FP128, + /// Extract a subvector from signed integer vector and convert to FP. /// It is primarily used to convert a (widened) illegal integer vector /// type to a legal floating point vector type. @@ -1092,6 +1095,7 @@ SDValue combineSHL(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineSRA(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineSRL(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue combineTRUNCATE(SDNode *N, DAGCombinerInfo &DCI) const; /// ConvertSETCCToSubtract - looks at SETCC that compares ints. It replaces /// SETCC with integer subtraction when (1) there is a legal way of doing it Index: lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- lib/Target/PowerPC/PPCISelLowering.cpp +++ lib/Target/PowerPC/PPCISelLowering.cpp @@ -1072,6 +1072,8 @@ setTargetDAGCombine(ISD::ZERO_EXTEND); setTargetDAGCombine(ISD::ANY_EXTEND); + setTargetDAGCombine(ISD::TRUNCATE); + if (Subtarget.useCRBits()) { setTargetDAGCombine(ISD::TRUNCATE); setTargetDAGCombine(ISD::SETCC); @@ -1351,6 +1353,7 @@ case PPCISD::QBFLT: return "PPCISD::QBFLT"; case PPCISD::QVLFSb: return "PPCISD::QVLFSb"; case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128"; + case PPCISD::EXTRACT_FP128: return "PPCISD::EXTRACT_FP128"; case PPCISD::EXTSWSLI: return "PPCISD::EXTSWSLI"; } return nullptr; @@ -9650,6 +9653,9 @@ return; Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl)); return; + case ISD::BITCAST: + // Don't handle bitcast here. + return; } } @@ -12492,6 +12498,7 @@ case ISD::ANY_EXTEND: return DAGCombineExtBoolTrunc(N, DCI); case ISD::TRUNCATE: + return combineTRUNCATE(N, DCI); case ISD::SETCC: case ISD::SELECT_CC: return DAGCombineTruncBoolExt(N, DCI); @@ -14172,6 +14179,62 @@ return SDValue(); } +// Detect TRUNCATE operations on bitcasts of float128 values. +// What we are looking for here is the situtation where we extract a subset +// of bits from a 128 bit float. +// This can be of two forms: +// 1) BITCAST of f128 feeding TRUNCATE +// 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE +// The reason this is required is because we do not have a legal i128 type +// and so we want to prevent having to store the f128 and then reload part +// of it. +SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N, + DAGCombinerInfo &DCI) const { + // If we are using CRBits then try that first. + if (Subtarget.useCRBits()) { + SDValue CRTruncValue = DAGCombineTruncBoolExt(N, DCI); + + // Check if CRBits did anything and return that if it did. + if (CRTruncValue) + return CRTruncValue; + } + + SDLoc dl(N); + const SDValue & Op0 = N->getOperand(0); + if (Op0.getValueType() == MVT::i128 && + N->getValueType(0) == MVT::i64) { + + EVT TCVT = MVT::i32; + // BITCAST feeding a TRUNCATE + if (Op0.getNode()->getOpcode() == ISD::BITCAST && + Op0.getNode()->getOperand(0).getValueType() == MVT::f128) { + + // Have a truncate fed by a bitcast of an f128. + return DCI.DAG.getNode(PPCISD::EXTRACT_FP128, dl, MVT::i64, + Op0.getNode()->getOperand(0), + DCI.DAG.getTargetConstant(0, dl, TCVT)); + } + + // BITCAST feeding SRL feeding TRUNCATE + if (Op0.getNode()->getOpcode() == ISD::SRL && + Op0.getNode()->getOperand(0).getValueType() == MVT::i128 && + Op0.getNode()->getConstantOperandVal(1) == 64 && + Op0.getNode()->getOperand(0).getNode()->getOpcode() == ISD::BITCAST) { + SDNode* BitcastNode = Op0.getNode()->getOperand(0).getNode(); + + // Check if we are bitcasting from an f128 + if (BitcastNode->getOperand(0).getValueType() == MVT::f128) { + return DCI.DAG.getNode(PPCISD::EXTRACT_FP128, dl, MVT::i64, + BitcastNode->getOperand(0), + DCI.DAG.getTargetConstant(64, dl, TCVT)); + } + } + + } + + return SDValue(); +} + bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { // Only duplicate to increase tail-calls for the 64bit SysV ABIs. if (!Subtarget.isSVR4ABI() || !Subtarget.isPPC64()) Index: lib/Target/PowerPC/PPCInstrInfo.td =================================================================== --- lib/Target/PowerPC/PPCInstrInfo.td +++ lib/Target/PowerPC/PPCInstrInfo.td @@ -231,6 +231,12 @@ SDTCisSameAs<1,2>]>, []>; +// Extract bits from a float128 +def PPCextract_fp128: SDNode<"PPCISD::EXTRACT_FP128", + SDTypeProfile<1, 2, + [SDTCisInt<0>, SDTCisFP<1>, SDTCisPtrTy<2>]>, + []>; + // These are target-independent nodes, but have target-specific formats. def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_PPCCallSeqStart, [SDNPHasChain, SDNPOutGlue]>; Index: lib/Target/PowerPC/PPCInstrVSX.td =================================================================== --- lib/Target/PowerPC/PPCInstrVSX.td +++ lib/Target/PowerPC/PPCInstrVSX.td @@ -1534,6 +1534,10 @@ def MFVSRLD: XX1_RS6_RD5_XO<31, 307, (outs g8rc:$rA), (ins vsrc:$XT), "mfvsrld $rA, $XT", IIC_VecGeneral, []>, Requires<[In64BitMode]>; + let isCodeGenOnly = 1 in + def MFVRLD: XX1_RS6_RD5_XO<31, 307, (outs g8rc:$rA), (ins vrrc:$XT), + "mfvsrld $rA, $XT", IIC_VecGeneral, + []>, Requires<[In64BitMode]>; } // IsISA3_0, HasDirectMove } // UseVSXReg = 1 @@ -3533,6 +3537,13 @@ } } +let Predicates = [IsISA3_0, HasP9Vector, HasDirectMove, IsLittleEndian] in { + def : Pat<(i64 (PPCextract_fp128 f128:$rA, 0)), + (i64 (MFVRD $rA))>; + def : Pat<(i64 (PPCextract_fp128 f128:$rA, 64)), + (i64 (MFVRLD $rA))>; +} + let Predicates = [HasP9Vector] in { let isPseudo = 1 in { let mayStore = 1 in { Index: test/CodeGen/PowerPC/f128-bitcast.ll =================================================================== --- /dev/null +++ test/CodeGen/PowerPC/f128-bitcast.ll @@ -0,0 +1,27 @@ +; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown \ +; RUN: -enable-ppc-quad-precision -verify-machineinstrs \ +; RUN: -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | FileCheck %s + +; Function Attrs: norecurse nounwind readnone +define i64 @getPart1(fp128 %in) local_unnamed_addr { +entry: + %0 = bitcast fp128 %in to i128 + %a.sroa.0.0.extract.trunc = trunc i128 %0 to i64 + ret i64 %a.sroa.0.0.extract.trunc +; CHECK-LABEL: getPart1 +; CHECK: mfvsrd r3, v2 +; CHECK-NEXT: blr +} + +; Function Attrs: norecurse nounwind readnone +define i64 @getPart2(fp128 %in) local_unnamed_addr { +entry: + %0 = bitcast fp128 %in to i128 + %a.sroa.0.8.extract.shift = lshr i128 %0, 64 + %a.sroa.0.8.extract.trunc = trunc i128 %a.sroa.0.8.extract.shift to i64 + ret i64 %a.sroa.0.8.extract.trunc +; CHECK-LABEL: getPart2 +; CHECK: mfvsrld r3, v2 +; CHECK-NEXT: blr +} +