Index: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h +++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h @@ -1093,6 +1093,7 @@ SDValue combineSRA(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineSRL(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineADD(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue combineTRUNCATE(SDNode *N, DAGCombinerInfo &DCI) const; /// ConvertSETCCToSubtract - looks at SETCC that compares ints. It replaces /// SETCC with integer subtraction when (1) there is a legal way of doing it Index: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp +++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1070,6 +1070,8 @@ setTargetDAGCombine(ISD::ZERO_EXTEND); setTargetDAGCombine(ISD::ANY_EXTEND); + setTargetDAGCombine(ISD::TRUNCATE); + if (Subtarget.useCRBits()) { setTargetDAGCombine(ISD::TRUNCATE); setTargetDAGCombine(ISD::SETCC); @@ -9634,6 +9636,9 @@ return; Results.push_back(LowerFP_TO_INT(SDValue(N, 0), DAG, dl)); return; + case ISD::BITCAST: + // Don't handle bitcast here. + return; } } @@ -12479,6 +12484,7 @@ case ISD::ANY_EXTEND: return DAGCombineExtBoolTrunc(N, DCI); case ISD::TRUNCATE: + return combineTRUNCATE(N, DCI); case ISD::SETCC: case ISD::SELECT_CC: return DAGCombineTruncBoolExt(N, DCI); @@ -14253,6 +14259,58 @@ return SDValue(); } +// Detect TRUNCATE operations on bitcasts of float128 values. +// What we are looking for here is the situtation where we extract a subset +// of bits from a 128 bit float. +// This can be of two forms: +// 1) BITCAST of f128 feeding TRUNCATE +// 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE +// The reason this is required is because we do not have a legal i128 type +// and so we want to prevent having to store the f128 and then reload part +// of it. +SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N, + DAGCombinerInfo &DCI) const { + // If we are using CRBits then try that first. + if (Subtarget.useCRBits()) { + // Check if CRBits did anything and return that if it did. + if (SDValue CRTruncValue = DAGCombineTruncBoolExt(N, DCI)) + return CRTruncValue; + } + + SDLoc dl(N); + SDValue Op0 = N->getOperand(0); + + // Looking for a truncate of i128 to i64. + if (Op0.getValueType() != MVT::i128 || N->getValueType(0) != MVT::i64) + return SDValue(); + + int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? 1 : 0; + + // SRL feeding TRUNCATE. + if (Op0.getOpcode() == ISD::SRL) { + ConstantSDNode *ConstNode = dyn_cast(Op0.getOperand(1)); + // The right shift has to be by 64 bits. + if (!ConstNode || ConstNode->getZExtValue() != 64) + return SDValue(); + + // Switch the element number to extract. + EltToExtract = EltToExtract ? 0 : 1; + // Update Op0 past the SRL. + Op0 = Op0.getOperand(0); + } + + // BITCAST feeding a TRUNCATE possibly via SRL. + if (Op0.getOpcode() == ISD::BITCAST && + Op0.getValueType() == MVT::i128 && + Op0.getOperand(0).getValueType() == MVT::f128) { + SDValue Bitcast = DCI.DAG.getBitcast(MVT::v2i64, Op0.getOperand(0)); + return DCI.DAG.getNode( + ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Bitcast, + DCI.DAG.getTargetConstant(EltToExtract, dl, MVT::i32)); + } + return SDValue(); +} + bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { // Only duplicate to increase tail-calls for the 64bit SysV ABIs. if (!Subtarget.isSVR4ABI() || !Subtarget.isPPC64()) Index: llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td +++ llvm/trunk/lib/Target/PowerPC/PPCInstrVSX.td @@ -1040,6 +1040,15 @@ def : Pat<(v1i128 (bitconvert v2f64:$A)), (COPY_TO_REGCLASS $A, VRRC)>; +def : Pat<(v2i64 (bitconvert f128:$A)), + (COPY_TO_REGCLASS $A, VRRC)>; +def : Pat<(v4i32 (bitconvert f128:$A)), + (COPY_TO_REGCLASS $A, VRRC)>; +def : Pat<(v8i16 (bitconvert f128:$A)), + (COPY_TO_REGCLASS $A, VRRC)>; +def : Pat<(v16i8 (bitconvert f128:$A)), + (COPY_TO_REGCLASS $A, VRRC)>; + def : Pat<(v2f64 (PPCsvec2fp v4i32:$C, 0)), (v2f64 (XVCVSXWDP (v2i64 (XXMRGHW $C, $C))))>; def : Pat<(v2f64 (PPCsvec2fp v4i32:$C, 1)), Index: llvm/trunk/test/CodeGen/PowerPC/f128-bitcast.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/f128-bitcast.ll +++ llvm/trunk/test/CodeGen/PowerPC/f128-bitcast.ll @@ -0,0 +1,53 @@ +; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown \ +; RUN: -enable-ppc-quad-precision -verify-machineinstrs \ +; RUN: -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | FileCheck %s +; RUN: llc -mcpu=pwr9 -mtriple=powerpc64-unknown-unknown \ +; RUN: -enable-ppc-quad-precision -verify-machineinstrs \ +; RUN: -ppc-asm-full-reg-names \ +; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE + +; Function Attrs: norecurse nounwind readnone +define i64 @getPart1(fp128 %in) local_unnamed_addr { +entry: + %0 = bitcast fp128 %in to i128 + %a.sroa.0.0.extract.trunc = trunc i128 %0 to i64 + ret i64 %a.sroa.0.0.extract.trunc +; CHECK-LABEL: getPart1 +; CHECK: mfvsrld r3, v2 +; CHECK-NEXT: blr +; CHECK-BE-LABEL: getPart1 +; CHECK-BE: mfvsrld r3, v2 +; CHECK-BE-NEXT: blr +} + +; Function Attrs: norecurse nounwind readnone +define i64 @getPart2(fp128 %in) local_unnamed_addr { +entry: + %0 = bitcast fp128 %in to i128 + %a.sroa.0.8.extract.shift = lshr i128 %0, 64 + %a.sroa.0.8.extract.trunc = trunc i128 %a.sroa.0.8.extract.shift to i64 + ret i64 %a.sroa.0.8.extract.trunc +; CHECK-LABEL: getPart2 +; CHECK: mfvsrd r3, v2 +; CHECK-NEXT: blr +; CHECK-BE-LABEL: getPart2 +; CHECK-BE: mfvsrd r3, v2 +; CHECK-BE-NEXT: blr +} + +; Function Attrs: norecurse nounwind readnone +define i64 @checkBitcast(fp128 %in, <2 x i64> %in2, <2 x i64> *%out) local_unnamed_addr { +entry: + %0 = bitcast fp128 %in to <2 x i64> + %1 = extractelement <2 x i64> %0, i64 0 + %2 = add <2 x i64> %0, %in2 + store <2 x i64> %2, <2 x i64> *%out, align 16 + ret i64 %1 +; CHECK-LABEL: checkBitcast +; CHECK: mfvsrld r3, v2 +; CHECK: blr +; CHECK-BE-LABEL: checkBitcast +; CHECK-BE: mfvsrd r3, v2 +; CHECK-BE: blr +} +