Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -7310,6 +7310,14 @@ // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) + // + // For ppc_fp128: + // fold (bitcast (fneg x)) -> + // flipbit = signbit + // (xor (bitcast x) (build_pair flipbit, flipbit)) + // fold (bitcast (fabs x)) -> + // flipbit = (and (extract_element (bitcast x), 0), signbit) + // (xor (bitcast x) (build_pair flipbit, flipbit)) // This often reduces constant pool loads. if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) || (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) && @@ -7320,6 +7328,28 @@ AddToWorklist(NewConv.getNode()); SDLoc DL(N); + if (N0.getValueType() == MVT::ppcf128) { + assert(VT.getSizeInBits() == 128); + SDValue SignBit = DAG.getConstant( + APInt::getSignBit(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64); + SDValue FlipBit; + if (N0.getOpcode() == ISD::FNEG) { + FlipBit = SignBit; + AddToWorklist(FlipBit.getNode()); + } else { + assert(N0.getOpcode() == ISD::FABS); + SDValue Hi = + DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv, + DAG.getIntPtrConstant(0, SDLoc(NewConv))); + AddToWorklist(Hi.getNode()); + FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit); + AddToWorklist(FlipBit.getNode()); + } + SDValue FlipBits = + DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit); + AddToWorklist(FlipBits.getNode()); + return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits); + } APInt SignBit = APInt::getSignBit(VT.getSizeInBits()); if (N0.getOpcode() == ISD::FNEG) return DAG.getNode(ISD::XOR, DL, VT, @@ -7333,6 +7363,13 @@ // (or (and (bitconvert x), sign), (and cst, (not sign))) // Note that we don't handle (copysign x, cst) because this can always be // folded to an fneg or fabs. + // + // For ppc_fp128: + // fold (bitcast (fcopysign cst, x)) -> + // flipbit = (and (extract_element + // (xor (bitcast cst), (bitcast x)), 0), + // signbit) + // (xor (bitcast cst) (build_pair flipbit, flipbit)) if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() && isa(N0.getOperand(0)) && VT.isInteger() && !VT.isVector()) { @@ -7361,6 +7398,29 @@ AddToWorklist(X.getNode()); } + if (N0.getValueType() == MVT::ppcf128) { + APInt SignBit = APInt::getSignBit(VT.getSizeInBits() / 2); + SDValue Cst = DAG.getNode(ISD::BITCAST, SDLoc(N0.getOperand(0)), VT, + N0.getOperand(0)); + AddToWorklist(Cst.getNode()); + SDValue X = DAG.getNode(ISD::BITCAST, SDLoc(N0.getOperand(1)), VT, + N0.getOperand(1)); + AddToWorklist(X.getNode()); + SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X); + AddToWorklist(XorResult.getNode()); + SDValue XorResult64 = + DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, + XorResult, DAG.getIntPtrConstant(0, SDLoc(XorResult))); + AddToWorklist(XorResult64.getNode()); + SDValue FlipBit = + DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64, + DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64)); + AddToWorklist(FlipBit.getNode()); + SDValue FlipBits = + DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit); + AddToWorklist(FlipBits.getNode()); + return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits); + } APInt SignBit = APInt::getSignBit(VT.getSizeInBits()); X = DAG.getNode(ISD::AND, SDLoc(X), VT, X, DAG.getConstant(SignBit, SDLoc(X), VT)); Index: test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll =================================================================== --- /dev/null +++ test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll @@ -0,0 +1,66 @@ +; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s + +define i128 @test_abs(ppc_fp128 %x) nounwind { +entry: +; CHECK-LABEL: test_abs: +; CHECK-DAG: stxsdx 2, 0, [[ADDR_HI:[0-9]+]] +; CHECK-DAG: stxsdx 1, 0, [[ADDR_LO:[0-9]+]] +; CHECK-DAG: addi [[ADDR_HI]], [[SP:[0-9]+]], [[OFFSET_HI:-?[0-9]+]] +; CHECK-DAG: addi [[ADDR_LO]], [[SP]], [[OFFSET_LO:-?[0-9]+]] +; CHECK-DAG: li [[MASK_REG:[0-9]+]], 1 +; CHECK: sldi [[MASK_REG]], [[MASK_REG]], 63 +; CHECK-DAG: ld [[HI:[0-9]+]], [[OFFSET_LO]]([[SP]]) +; CHECK-DAG: ld [[LO:[0-9]+]], [[OFFSET_HI]]([[SP]]) +; CHECK: and [[FLIP_BIT:[0-9]+]], [[HI]], [[MASK_REG]] +; CHECK-DAG: xor 3, [[HI]], [[FLIP_BIT]] +; CHECK-DAG: xor 4, [[LO]], [[FLIP_BIT]] +; CHECK: blr + %0 = tail call ppc_fp128 @llvm.fabs.ppcf128(ppc_fp128 %x) + %1 = bitcast ppc_fp128 %0 to i128 + ret i128 %1 +} + +define i128 @test_neg(ppc_fp128 %x) nounwind { +entry: +; CHECK-LABEL: test_neg: +; CHECK-DAG: stxsdx 2, 0, [[ADDR_HI:[0-9]+]] +; CHECK-DAG: stxsdx 1, 0, [[ADDR_LO:[0-9]+]] +; CHECK-DAG: addi [[ADDR_HI]], [[SP:[0-9]+]], [[OFFSET_HI:-?[0-9]+]] +; CHECK-DAG: addi [[ADDR_LO]], [[SP]], [[OFFSET_LO:-?[0-9]+]] +; CHECK-DAG: li [[FLIP_BIT:[0-9]+]], 1 +; CHECK-DAG: sldi [[FLIP_BIT]], [[FLIP_BIT]], 63 +; CHECK-DAG: ld [[HI:[0-9]+]], [[OFFSET_LO]]([[SP]]) +; CHECK-DAG: ld [[LO:[0-9]+]], [[OFFSET_HI]]([[SP]]) +; CHECK-NOT: BARRIER +; CHECK-DAG: xor 3, [[HI]], [[FLIP_BIT]] +; CHECK-DAG: xor 4, [[LO]], [[FLIP_BIT]] +; CHECK: blr + %0 = fsub ppc_fp128 0xM80000000000000000000000000000000, %x + %1 = bitcast ppc_fp128 %0 to i128 + ret i128 %1 +} + +define i128 @test_copysign(ppc_fp128 %x) nounwind { +entry: +; CHECK-LABEL: test_copysign: +; CHECK-DAG: stxsdx 1, 0, [[ADDR_REG:[0-9]+]] +; CHECK-DAG: addi [[ADDR_REG]], 1, [[OFFSET:-?[0-9]+]] +; CHECK-DAG: li [[SIGN:[0-9]+]], 1 +; CHECK-DAG: sldi [[SIGN]], [[SIGN]], 63 +; CHECK-DAG: li [[HI_TMP:[0-9]+]], 16399 +; CHECK-DAG: sldi [[CST_HI:[0-9]+]], [[HI_TMP]], 48 +; CHECK-DAG: li [[LO_TMP:[0-9]+]], 3019 +; CHECK-DAG: sldi [[CST_LO:[0-9]+]], [[LO_TMP]], 52 +; CHECK-NOT: BARRIER +; CHECK-DAG: ld [[X_HI:[0-9]+]], [[OFFSET]](1) +; CHECK-DAG: and [[NEW_HI_TMP:[0-9]+]], [[X_HI]], [[SIGN]] +; CHECK-DAG: or 3, [[NEW_HI_TMP]], [[CST_HI]] +; CHECK-DAG: xor 4, [[SIGN]], [[CST_LO]] +; CHECK: blr + %0 = tail call ppc_fp128 @llvm.copysign.ppcf128(ppc_fp128 0xMBCB0000000000000400F000000000000, ppc_fp128 %x) + %1 = bitcast ppc_fp128 %0 to i128 + ret i128 %1 +} + +declare ppc_fp128 @llvm.fabs.ppcf128(ppc_fp128) +declare ppc_fp128 @llvm.copysign.ppcf128(ppc_fp128, ppc_fp128)