Index: lib/Target/PowerPC/PPCISelLowering.h =================================================================== --- lib/Target/PowerPC/PPCISelLowering.h +++ lib/Target/PowerPC/PPCISelLowering.h @@ -1093,6 +1093,7 @@ SDValue combineSRA(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineSRL(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineADD(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue combineTRUNCATE(SDNode *N, DAGCombinerInfo &DCI) const; /// ConvertSETCCToSubtract - looks at SETCC that compares ints. It replaces /// SETCC with integer subtraction when (1) there is a legal way of doing it Index: lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- lib/Target/PowerPC/PPCISelLowering.cpp +++ lib/Target/PowerPC/PPCISelLowering.cpp @@ -1077,6 +1077,8 @@ setTargetDAGCombine(ISD::ZERO_EXTEND); setTargetDAGCombine(ISD::ANY_EXTEND); + setTargetDAGCombine(ISD::TRUNCATE); + if (Subtarget.useCRBits()) { setTargetDAGCombine(ISD::TRUNCATE); setTargetDAGCombine(ISD::SETCC); @@ -9666,6 +9668,9 @@ return; Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl)); return; + case ISD::BITCAST: + // Don't handle bitcast here. + return; } } @@ -12511,6 +12516,7 @@ case ISD::ANY_EXTEND: return DAGCombineExtBoolTrunc(N, DCI); case ISD::TRUNCATE: + return combineTRUNCATE(N, DCI); case ISD::SETCC: case ISD::SELECT_CC: return DAGCombineTruncBoolExt(N, DCI); @@ -14285,6 +14291,64 @@ return SDValue(); } +// Detect TRUNCATE operations on bitcasts of float128 values. +// What we are looking for here is the situtation where we extract a subset +// of bits from a 128 bit float. +// This can be of two forms: +// 1) BITCAST of f128 feeding TRUNCATE +// 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE +// The reason this is required is because we do not have a legal i128 type +// and so we want to prevent having to store the f128 and then reload part +// of it. +SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N, + DAGCombinerInfo &DCI) const { + // If we are using CRBits then try that first. + if (Subtarget.useCRBits()) { + // Check if CRBits did anything and return that if it did. + if (SDValue CRTruncValue = DAGCombineTruncBoolExt(N, DCI)) + return CRTruncValue; + } + + SDLoc dl(N); + const SDValue &Op0 = N->getOperand(0); + + // Looking for a truncate of i128 to i64. + if (Op0.getValueType() != MVT::i128 || N->getValueType(0) != MVT::i64) + return SDValue(); + + bool BigEndian = DCI.DAG.getDataLayout().isBigEndian(); + // BITCAST feeding a TRUNCATE + if (Op0.getOpcode() == ISD::BITCAST && + Op0.getOperand(0).getValueType() == MVT::f128) { + + // Have a truncate fed by a bitcast of an f128. + SDValue Bitcast = DCI.DAG.getBitcast(MVT::v2i64, Op0.getOperand(0)); + return DCI.DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Bitcast, + DCI.DAG.getTargetConstant(BigEndian ? 1 : 0, dl, MVT::i32)); + } + + // BITCAST feeding SRL feeding TRUNCATE + if (Op0.getOpcode() == ISD::SRL) { + ConstantSDNode *ConstNode = dyn_cast(Op0.getOperand(1)); + if (ConstNode && ConstNode->getZExtValue() == 64 && + Op0.getOperand(0).getValueType() == MVT::i128 && + Op0.getOperand(0).getOpcode() == ISD::BITCAST) { + SDNode *BitcastNode = Op0.getOperand(0).getNode(); + + // Check if we are bitcasting from an f128 + if (BitcastNode->getOperand(0).getValueType() == MVT::f128) { + SDValue Bitcast = DCI.DAG.getBitcast(MVT::v2i64, Op0.getOperand(0)); + return DCI.DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Bitcast, + DCI.DAG.getTargetConstant(BigEndian ? 0 : 1, dl, MVT::i32)); + } + } + } + + return SDValue(); +} + bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { // Only duplicate to increase tail-calls for the 64bit SysV ABIs. if (!Subtarget.isSVR4ABI() || !Subtarget.isPPC64()) Index: lib/Target/PowerPC/PPCInstrVSX.td =================================================================== --- lib/Target/PowerPC/PPCInstrVSX.td +++ lib/Target/PowerPC/PPCInstrVSX.td @@ -1040,6 +1040,16 @@ def : Pat<(v1i128 (bitconvert v2f64:$A)), (COPY_TO_REGCLASS $A, VRRC)>; +def : Pat<(v2i64 (bitconvert f128:$A)), + (COPY_TO_REGCLASS $A, VRRC)>; +def : Pat<(v4i32 (bitconvert f128:$A)), + (COPY_TO_REGCLASS $A, VRRC)>; +def : Pat<(v8i16 (bitconvert f128:$A)), + (COPY_TO_REGCLASS $A, VRRC)>; +def : Pat<(v16i8 (bitconvert f128:$A)), + (COPY_TO_REGCLASS $A, VRRC)>; + + // sign extension patterns // To extend "in place" from v2i32 to v2i64, we have input data like: // | undef | i32 | undef | i32 | Index: test/CodeGen/PowerPC/f128-bitcast.ll =================================================================== --- /dev/null +++ test/CodeGen/PowerPC/f128-bitcast.ll @@ -0,0 +1,53 @@ +; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown \ +; RUN: -enable-ppc-quad-precision -verify-machineinstrs \ +; RUN: -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | FileCheck %s +; RUN: llc -mcpu=pwr9 -mtriple=powerpc64-unknown-unknown \ +; RUN: -enable-ppc-quad-precision -verify-machineinstrs \ +; RUN: -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE + +; Function Attrs: norecurse nounwind readnone +define i64 @getPart1(fp128 %in) local_unnamed_addr { +entry: + %0 = bitcast fp128 %in to i128 + %a.sroa.0.0.extract.trunc = trunc i128 %0 to i64 + ret i64 %a.sroa.0.0.extract.trunc +; CHECK-LABEL: getPart1 +; CHECK: mfvsrld r3, v2 +; CHECK-NEXT: blr +; CHECK-BE-LABEL: getPart1 +; CHECK-BE: mfvsrld r3, v2 +; CHECK-BE-NEXT: blr +} + +; Function Attrs: norecurse nounwind readnone +define i64 @getPart2(fp128 %in) local_unnamed_addr { +entry: + %0 = bitcast fp128 %in to i128 + %a.sroa.0.8.extract.shift = lshr i128 %0, 64 + %a.sroa.0.8.extract.trunc = trunc i128 %a.sroa.0.8.extract.shift to i64 + ret i64 %a.sroa.0.8.extract.trunc +; CHECK-LABEL: getPart2 +; CHECK: mfvsrd r3, v2 +; CHECK-NEXT: blr +; CHECK-BE-LABEL: getPart2 +; CHECK-BE: mfvsrd r3, v2 +; CHECK-BE-NEXT: blr +} + +; Function Attrs: norecurse nounwind readnone +define i64 @checkBitcast(fp128 %in, <2 x i64> %in2, <2 x i64> *%out) local_unnamed_addr { +entry: + %0 = bitcast fp128 %in to <2 x i64> + %1 = extractelement <2 x i64> %0, i64 0 + %2 = add <2 x i64> %0, %in2 + store <2 x i64> %2, <2 x i64> *%out, align 16 + ret i64 %1 +; CHECK-LABEL: checkBitcast +; CHECK: mfvsrld r3, v2 +; CHECK: blr +; CHECK-BE-LABEL: checkBitcast +; CHECK-BE: mfvsrd r3, v2 +; CHECK-BE: blr +} +