diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -1406,6 +1406,7 @@ SDValue combineSRL(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineMUL(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineADD(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue combineSUB(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineFMALike(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineTRUNCATE(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineSetCC(SDNode *N, DAGCombinerInfo &DCI) const; diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1360,6 +1360,9 @@ // We have target-specific dag combine patterns for the following nodes: setTargetDAGCombine({ISD::ADD, ISD::SHL, ISD::SRA, ISD::SRL, ISD::MUL, ISD::FMA, ISD::SINT_TO_FP, ISD::BUILD_VECTOR}); + if (Subtarget.isISA3_0() && Subtarget.isPPC64()) { + setTargetDAGCombine({ISD::SUB}); + } if (Subtarget.hasFPCVT()) setTargetDAGCombine(ISD::UINT_TO_FP); setTargetDAGCombine({ISD::LOAD, ISD::STORE, ISD::BR_CC}); @@ -15106,6 +15109,8 @@ default: break; case ISD::ADD: return combineADD(N, DCI); + case ISD::SUB: + return combineSUB(N, DCI); case ISD::SHL: return combineSHL(N, DCI); case ISD::SRA: @@ -17125,6 +17130,80 @@ return MatPCRel; } +// Look for i128 multiply-add opportunities: +// (add (mul (sext A) (sext B)) (sext C)) => +// (build_pair (maddld A,B,C), (maddhd A,B,C)) +// (add (mul (zext A) (zext B)) (zext C)) => +// (build_pair (maddld A,B,C), (maddhdu A,B,C)) +// (sub (mul (sext A) (sext B)) (sext C)) => +// (build_pair (maddld A,B,-C), (maddhd A,B,-C)) +// (sub (sext C) (mul (sext A) (sext B))) => +// (build_pair (maddld -A,B,C), (maddhd -A,B,C)) +static SDValue combineADD_SUBToMADD(SDNode *N, SelectionDAG &DAG, + const PPCSubtarget &Subtarget) { + if (!Subtarget.isPPC64() || !Subtarget.isISA3_0()) + return SDValue(); + + if (N->getOpcode() != ISD::ADD && N->getOpcode() != ISD::SUB) + return SDValue(); + + if (N->getValueType(0) != MVT::i128) + return SDValue(); + + if (N->getOperand(0).getOpcode() != ISD::MUL && + N->getOperand(1).getOpcode() != ISD::MUL) + return SDValue(); + + SDValue MulOp = N->getOperand(0).getOpcode() == ISD::MUL ? N->getOperand(0) + : N->getOperand(1); + // Do not transform if there are other users of the mul. + if (!MulOp.hasOneUse()) + return SDValue(); + + SDValue MulLHS = MulOp->getOperand(0); + SDValue MulRHS = MulOp->getOperand(1); + SDValue AddSubOp = N->getOperand(0).getOpcode() == ISD::MUL + ? N->getOperand(1) + : N->getOperand(0); + + bool IsSigned = MulLHS->getOpcode() == ISD::SIGN_EXTEND && + MulRHS->getOpcode() == ISD::SIGN_EXTEND && + AddSubOp->getOpcode() == ISD::SIGN_EXTEND; + bool IsUnsigned = MulLHS->getOpcode() == ISD::ZERO_EXTEND && + MulRHS->getOpcode() == ISD::ZERO_EXTEND && + AddSubOp->getOpcode() == ISD::ZERO_EXTEND; + + if (!IsSigned && !IsUnsigned) + return SDValue(); + + // SUB unsigned does not worth the effort. + if (N->getOpcode() == ISD::SUB && IsUnsigned) + return SDValue(); + + SDLoc dl(N); + SDValue Op0 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, MulLHS); + SDValue Op1 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, MulRHS); + SDValue Op2 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, AddSubOp); + + // Move the minus sign for SUB. + if (N->getOpcode() == ISD::SUB) { + if (N->getOperand(0).getOpcode() == ISD::MUL) + Op2 = DAG.getNode(ISD::SUB, dl, MVT::i64, + DAG.getConstant(0, dl, MVT::i64), Op2); + else + Op0 = DAG.getNode(ISD::SUB, dl, MVT::i64, + DAG.getConstant(0, dl, MVT::i64), Op0); + } + + SDValue MAddL = + BuildIntrinsicOp(Intrinsic::ppc_maddld, Op0, Op1, Op2, DAG, dl); + SDValue MAddH = BuildIntrinsicOp(IsUnsigned ? Intrinsic::ppc_maddhdu + : Intrinsic::ppc_maddhd, + Op0, Op1, Op2, DAG, dl); + SDValue Combined = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i128, MAddL, MAddH); + return Combined; +} + SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const { if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget)) return Value; @@ -17132,6 +17211,16 @@ if (auto Value = combineADDToMAT_PCREL_ADDR(N, DCI.DAG, Subtarget)) return Value; + if (auto Value = combineADD_SUBToMADD(N, DCI.DAG, Subtarget)) + return Value; + + return SDValue(); +} + +SDValue PPCTargetLowering::combineSUB(SDNode *N, DAGCombinerInfo &DCI) const { + if (auto Value = combineADD_SUBToMADD(N, DCI.DAG, Subtarget)) + return Value; + return SDValue(); } diff --git a/llvm/test/CodeGen/PowerPC/add-sub-int128-madd.ll b/llvm/test/CodeGen/PowerPC/add-sub-int128-madd.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/add-sub-int128-madd.ll @@ -0,0 +1,68 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=CHECK-P9 + +define i128 @add_int128(i64 noundef %a, i64 noundef %b, i64 noundef %c) local_unnamed_addr #0 { +; CHECK-P9-LABEL: add_int128: +; CHECK-P9: # %bb.0: # %entry +; CHECK-P9-NEXT: maddld 6, 4, 3, 5 +; CHECK-P9-NEXT: maddhd 4, 4, 3, 5 +; CHECK-P9-NEXT: mr 3, 6 +; CHECK-P9-NEXT: blr +entry: + %conv = sext i64 %a to i128 + %conv1 = sext i64 %b to i128 + %mul = mul nsw i128 %conv1, %conv + %conv2 = sext i64 %c to i128 + %add = add nsw i128 %mul, %conv2 + ret i128 %add +} + +define i128 @add_unsigned_int128(i64 noundef %a, i64 noundef %b, i64 noundef %c) local_unnamed_addr #0 { +; CHECK-P9-LABEL: add_unsigned_int128: +; CHECK-P9: # %bb.0: # %entry +; CHECK-P9-NEXT: maddld 6, 4, 3, 5 +; CHECK-P9-NEXT: maddhdu 4, 4, 3, 5 +; CHECK-P9-NEXT: mr 3, 6 +; CHECK-P9-NEXT: blr +entry: + %conv = zext i64 %a to i128 + %conv1 = zext i64 %b to i128 + %mul = mul nuw i128 %conv1, %conv + %conv2 = zext i64 %c to i128 + %add = add nuw i128 %mul, %conv2 + ret i128 %add +} + +define i128 @sub_int128_AxBmC(i64 noundef %a, i64 noundef %b, i64 noundef %c) local_unnamed_addr #0 { +; CHECK-P9-LABEL: sub_int128_AxBmC: +; CHECK-P9: # %bb.0: # %entry +; CHECK-P9-NEXT: neg 6, 5 +; CHECK-P9-NEXT: maddld 5, 4, 3, 6 +; CHECK-P9-NEXT: maddhd 4, 4, 3, 6 +; CHECK-P9-NEXT: mr 3, 5 +; CHECK-P9-NEXT: blr +entry: + %conv = sext i64 %a to i128 + %conv1 = sext i64 %b to i128 + %mul = mul nsw i128 %conv1, %conv + %conv2 = sext i64 %c to i128 + %sub = sub nsw i128 %mul, %conv2 + ret i128 %sub +} + +define i128 @sub_int128_CmAxB(i64 noundef %a, i64 noundef %b, i64 noundef %c) local_unnamed_addr #0 { +; CHECK-P9-LABEL: sub_int128_CmAxB: +; CHECK-P9: # %bb.0: # %entry +; CHECK-P9-NEXT: neg 4, 4 +; CHECK-P9-NEXT: maddld 6, 4, 3, 5 +; CHECK-P9-NEXT: maddhd 4, 4, 3, 5 +; CHECK-P9-NEXT: mr 3, 6 +; CHECK-P9-NEXT: blr +entry: + %conv = sext i64 %c to i128 + %conv1 = sext i64 %a to i128 + %conv2 = sext i64 %b to i128 + %mul = mul nsw i128 %conv2, %conv1 + %sub = sub nsw i128 %conv, %mul + ret i128 %sub +}