Index: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h +++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h @@ -1121,6 +1121,7 @@ SDValue combineSHL(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineSRA(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineSRL(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue combineMUL(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineADD(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineTRUNCATE(SDNode *N, DAGCombinerInfo &DCI) const; SDValue combineSetCC(SDNode *N, DAGCombinerInfo &DCI) const; Index: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp +++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1071,6 +1071,7 @@ setTargetDAGCombine(ISD::SHL); setTargetDAGCombine(ISD::SRA); setTargetDAGCombine(ISD::SRL); + setTargetDAGCombine(ISD::MUL); setTargetDAGCombine(ISD::SINT_TO_FP); setTargetDAGCombine(ISD::BUILD_VECTOR); if (Subtarget.hasFPCVT()) @@ -12643,6 +12644,8 @@ return combineSRA(N, DCI); case ISD::SRL: return combineSRL(N, DCI); + case ISD::MUL: + return combineMUL(N, DCI); case PPCISD::SHL: if (isNullConstant(N->getOperand(0))) // 0 << V -> 0. return N->getOperand(0); @@ -14565,6 +14568,89 @@ return SDValue(); } +SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; + + ConstantSDNode *ConstOpOrElement = isConstOrConstSplat(N->getOperand(1)); + if (!ConstOpOrElement) + return SDValue(); + + // An imul is usually smaller than the alternative sequence for legal type. + if (DAG.getMachineFunction().getFunction().optForMinSize() && + isOperationLegal(ISD::MUL, N->getValueType(0))) + return SDValue(); + + auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool { + switch (this->Subtarget.getDarwinDirective()) { + default: + // TODO: enhance the condition for subtarget before pwr8 + return false; + case PPC::DIR_PWR8: + // type mul add shl + // scalar 4 1 1 + // vector 7 2 2 + return true; + case PPC::DIR_PWR9: + // type mul add shl + // scalar 5 2 2 + // vector 7 2 2 + + // The cycle RATIO of related operations are showed as a table above. + // Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both + // scalar and vector type. For 2 instrs patterns, add/sub + shl + // are 4, it is always profitable; but for 3 instrs patterns + // (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6. + // So we should only do it for vector type. + return IsAddOne && IsNeg ? VT.isVector() : true; + } + }; + + EVT VT = N->getValueType(0); + SDLoc DL(N); + + const APInt &MulAmt = ConstOpOrElement->getAPIntValue(); + bool IsNeg = MulAmt.isNegative(); + APInt MulAmtAbs = MulAmt.abs(); + + if ((MulAmtAbs - 1).isPowerOf2()) { + // (mul x, 2^N + 1) => (add (shl x, N), x) + // (mul x, -(2^N + 1)) => -(add (shl x, N), x) + + if (!IsProfitable(IsNeg, true, VT)) + return SDValue(); + + SDValue Op0 = N->getOperand(0); + SDValue Op1 = + DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), + DAG.getConstant((MulAmtAbs - 1).logBase2(), DL, VT)); + SDValue Res = DAG.getNode(ISD::ADD, DL, VT, Op0, Op1); + + if (!IsNeg) + return Res; + + return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res); + } else if ((MulAmtAbs + 1).isPowerOf2()) { + // (mul x, 2^N - 1) => (sub (shl x, N), x) + // (mul x, -(2^N - 1)) => (sub x, (shl x, N)) + + if (!IsProfitable(IsNeg, false, VT)) + return SDValue(); + + SDValue Op0 = N->getOperand(0); + SDValue Op1 = + DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), + DAG.getConstant((MulAmtAbs + 1).logBase2(), DL, VT)); + + if (!IsNeg) + return DAG.getNode(ISD::SUB, DL, VT, Op1, Op0); + else + return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1); + + } else { + return SDValue(); + } +} + bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { // Only duplicate to increase tail-calls for the 64bit SysV ABIs. if (!Subtarget.isSVR4ABI() || !Subtarget.isPPC64()) Index: llvm/trunk/test/CodeGen/PowerPC/mul-const-i64.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/mul-const-i64.ll +++ llvm/trunk/test/CodeGen/PowerPC/mul-const-i64.ll @@ -0,0 +1,92 @@ +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr -mcpu=generic < %s -mtriple=ppc64-- | FileCheck %s -check-prefix=GENERIC-CHECK +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr -mcpu=pwr8 < %s -mtriple=ppc64-- | FileCheck %s -check-prefixes=PWR8-CHECK,CHECK +; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr -mcpu=pwr9 < %s -mtriple=ppc64le-- | FileCheck %s -check-prefixes=PWR9-CHECK,CHECK + + +define i64 @foo(i64 %a) { +entry: + %mul = mul nsw i64 %a, 6 + ret i64 %mul +} + +; GENERIC-CHECK-LABEL: @foo +; GENERIC-CHECK: mulli r3, r3, 6 +; GENERIC-CHECK: blr + +define i64 @test1(i64 %a) { + %tmp.1 = mul nsw i64 %a, 16 ; [#uses=1] + ret i64 %tmp.1 +} +; CHECK-LABEL: test1: +; CHECK-NOT: mul +; CHECK: sldi r[[REG1:[0-9]+]], r3, 4 + + +define i64 @test2(i64 %a) { + %tmp.1 = mul nsw i64 %a, 17 ; [#uses=1] + ret i64 %tmp.1 +} +; CHECK-LABEL: test2: +; CHECK-NOT: mul +; CHECK: sldi r[[REG1:[0-9]+]], r3, 4 +; CHECK-NEXT: add r[[REG2:[0-9]+]], r3, r[[REG1]] + +define i64 @test3(i64 %a) { + %tmp.1 = mul nsw i64 %a, 15 ; [#uses=1] + ret i64 %tmp.1 +} +; CHECK-LABEL: test3: +; CHECK-NOT: mul +; CHECK: sldi r[[REG1:[0-9]+]], r3, 4 +; CHECK-NEXT: sub r[[REG2:[0-9]+]], r[[REG1]], r3 + +; negtive constant + +define i64 @test4(i64 %a) { + %tmp.1 = mul nsw i64 %a, -16 ; [#uses=1] + ret i64 %tmp.1 +} +; CHECK-LABEL: test4: +; CHECK-NOT: mul +; CHECK: sldi r[[REG1:[0-9]+]], r3, 4 +; CHECK-NEXT: neg r[[REG2:[0-9]+]], r[[REG1]] + +define i64 @test5(i64 %a) { + %tmp.1 = mul nsw i64 %a, -17 ; [#uses=1] + ret i64 %tmp.1 +} +; CHECK-LABEL: test5: +; PWR9-CHECK: mulli r[[REG1:[0-9]+]], r3, -17 +; PWR8-CHECK-NOT: mul +; PWR8-CHECK: sldi r[[REG1:[0-9]+]], r3, 4 +; PWR8-CHECK-NEXT: add r[[REG2:[0-9]+]], r3, r[[REG1]] +; PWR8-CHECK-NEXT: neg r{{[0-9]+}}, r[[REG2]] + +define i64 @test6(i64 %a) { + %tmp.1 = mul nsw i64 %a, -15 ; [#uses=1] + ret i64 %tmp.1 +} +; CHECK-LABEL: test6: +; CHECK-NOT: mul +; CHECK: sldi r[[REG1:[0-9]+]], r3, 4 +; CHECK-NEXT: sub r[[REG2:[0-9]+]], r3, r[[REG1]] +; CHECK-NOT: neg + +; boundary case + +define i64 @test7(i64 %a) { + %tmp.1 = mul nsw i64 %a, -9223372036854775808 ; [#uses=1] + ret i64 %tmp.1 +} +; CHECK-LABEL: test7: +; CHECK-NOT: mul +; CHECK: sldi r[[REG1:[0-9]+]], r3, 63 + +define i64 @test8(i64 %a) { + %tmp.1 = mul nsw i64 %a, 9223372036854775807 ; [#uses=1] + ret i64 %tmp.1 +} +; CHECK-LABEL: test8: +; CHECK-NOT: mul +; CHECK: sldi r[[REG1:[0-9]+]], r3, 63 +; CHECK-NEXT: sub r[[REG2:[0-9]+]], r[[REG1]], r3 Index: llvm/trunk/test/CodeGen/PowerPC/mul-const-vector.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/mul-const-vector.ll +++ llvm/trunk/test/CodeGen/PowerPC/mul-const-vector.ll @@ -0,0 +1,382 @@ +; RUN: llc -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names < %s -mtriple=ppc64le-- -mcpu=pwr8 | FileCheck %s --check-prefixes=CHECK,CHECK-P8 +; RUN: llc -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names < %s -mtriple=ppc64le-- -mcpu=pwr9 | FileCheck %s --check-prefixes=CHECK,CHECK-P9 + +define <16 x i8> @test1_v16i8(<16 x i8> %a) { + %tmp.1 = mul nsw <16 x i8> %a, ; <<16 x i8>> [#uses=1] + ret <16 x i8> %tmp.1 +} +; CHECK-LABEL: test1_v16i8: +; CHECK-P8: vspltisb v[[REG1:[0-9]+]], 4 +; CHECK-P9: xxspltib v[[REG1:[0-9]+]], 4 +; CHECK-NOT: vmul +; CHECK-NEXT: vslb v[[REG2:[0-9]+]], v2, v[[REG1]] + +define <16 x i8> @test2_v16i8(<16 x i8> %a) { + %tmp.1 = mul nsw <16 x i8> %a, ; <<16 x i8>> [#uses=1] + ret <16 x i8> %tmp.1 +} +; CHECK-LABEL: test2_v16i8: +; CHECK-P8: vspltisb v[[REG1:[0-9]+]], 4 +; CHECK-P9: xxspltib v[[REG1:[0-9]+]], 4 +; CHECK-NOT: vmul +; CHECK-NEXT: vslb v[[REG2:[0-9]+]], v2, v[[REG1]] +; CHECK-NEXT: vaddubm v[[REG3:[0-9]+]], v2, v[[REG2]] + +define <16 x i8> @test3_v16i8(<16 x i8> %a) { + %tmp.1 = mul nsw <16 x i8> %a, ; <<16 x i8>> [#uses=1] + ret <16 x i8> %tmp.1 +} +; CHECK-LABEL: test3_v16i8: +; CHECK-P8: vspltisb v[[REG1:[0-9]+]], 4 +; CHECK-P9: xxspltib v[[REG1:[0-9]+]], 4 +; CHECK-NOT: vmul +; CHECK-NEXT: vslb v[[REG2:[0-9]+]], v2, v[[REG1]] +; CHECK-NEXT: vsububm v[[REG3:[0-9]+]], v[[REG2]], v2 + +; negtive constant + +define <16 x i8> @test4_v16i8(<16 x i8> %a) { + %tmp.1 = mul nsw <16 x i8> %a, ; <<16 x i8>> [#uses=1] + ret <16 x i8> %tmp.1 +} +; CHECK-LABEL: test4_v16i8: +; CHECK-P8: vspltisb v[[REG1:[0-9]+]], 4 +; CHECK-P9: xxspltib v[[REG1:[0-9]+]], 4 +; CHECK-NOT: vmul +; CHECK-NEXT: vslb v[[REG3:[0-9]+]], v2, v[[REG1]] +; CHECK-NEXT: xxlxor v[[REG2:[0-9]+]], +; CHECK-NEXT: vsububm v[[REG4:[0-9]+]], v[[REG2]], v[[REG3]] + +define <16 x i8> @test5_v16i8(<16 x i8> %a) { + %tmp.1 = mul nsw <16 x i8> %a, ; <<16 x i8>> [#uses=1] + ret <16 x i8> %tmp.1 +} +; CHECK-LABEL: test5_v16i8: +; CHECK-P8: vspltisb v[[REG1:[0-9]+]], 4 +; CHECK-P9: xxspltib v[[REG1:[0-9]+]], 4 +; CHECK-NOT: vmul +; CHECK-NEXT: vslb v[[REG3:[0-9]+]], v2, v[[REG1]] +; CHECK-NEXT: vaddubm v[[REG4:[0-9]+]], v2, v[[REG3]] +; CHECK-NEXT: xxlxor v[[REG2:[0-9]+]], +; CHECK-NEXT: vsububm v[[REG5:[0-9]+]], v[[REG2]], v[[REG4]] + +define <16 x i8> @test6_v16i8(<16 x i8> %a) { + %tmp.1 = mul nsw <16 x i8> %a, ; <<16 x i8>> [#uses=1] + ret <16 x i8> %tmp.1 +} +; CHECK-LABEL: test6_v16i8: +; CHECK-P8: vspltisb v[[REG1:[0-9]+]], 4 +; CHECK-P9: xxspltib v[[REG1:[0-9]+]], 4 +; CHECK-NOT: vmul +; CHECK-NEXT: vslb v[[REG2:[0-9]+]], v2, v[[REG1]] +; CHECK-NEXT: vsububm v[[REG3:[0-9]+]], v2, v[[REG2]] + +; boundary case + +define <16 x i8> @test7_v16i8(<16 x i8> %a) { + %tmp.1 = mul nsw <16 x i8> %a, ; <<16 x i8>> [#uses=1] + ret <16 x i8> %tmp.1 +} +; CHECK-LABEL: test7_v16i8: +; CHECK-P8: vspltisb v[[REG1:[0-9]+]], 7 +; CHECK-P9: xxspltib v[[REG1:[0-9]+]], 7 +; CHECK-NOT: vmul +; CHECK-NEXT: vslb v[[REG5:[0-9]+]], v2, v[[REG1]] + +define <16 x i8> @test8_v16i8(<16 x i8> %a) { + %tmp.1 = mul nsw <16 x i8> %a, ; <<16 x i8>> [#uses=1] + ret <16 x i8> %tmp.1 +} +; CHECK-LABEL: test8_v16i8: +; CHECK-P8: vspltisb v[[REG1:[0-9]+]], 7 +; CHECK-P9: xxspltib v[[REG1:[0-9]+]], 7 +; CHECK-NOT: vmul +; CHECK-NEXT: vslb v[[REG2:[0-9]+]], v2, v[[REG1]] +; CHECK-NEXT: vsububm v[[REG3:[0-9]+]], v[[REG2]], v2 + +define <8 x i16> @test1_v8i16(<8 x i16> %a) { + %tmp.1 = mul nsw <8 x i16> %a, ; <<8 x i16>> [#uses=1] + ret <8 x i16> %tmp.1 +} +; CHECK-LABEL: test1_v8i16: +; CHECK: vspltish v[[REG1:[0-9]+]], 4 +; CHECK-NOT: vmul +; CHECK-NEXT: vslh v[[REG2:[0-9]+]], v2, v[[REG1]] + +define <8 x i16> @test2_v8i16(<8 x i16> %a) { + %tmp.1 = mul nsw <8 x i16> %a, ; <<8 x i16>> [#uses=1] + ret <8 x i16> %tmp.1 +} +; CHECK-LABEL: test2_v8i16: +; CHECK: vspltish v[[REG1:[0-9]+]], 4 +; CHECK-NOT: vmul +; CHECK-NEXT: vslh v[[REG2:[0-9]+]], v2, v[[REG1]] +; CHECK-NEXT: vadduhm v[[REG3:[0-9]+]], v2, v[[REG2]] + +define <8 x i16> @test3_v8i16(<8 x i16> %a) { + %tmp.1 = mul nsw <8 x i16> %a, ; <<8 x i16>> [#uses=1] + ret <8 x i16> %tmp.1 +} +; CHECK-LABEL: test3_v8i16: +; CHECK: vspltish v[[REG1:[0-9]+]], 4 +; CHECK-NOT: vmul +; CHECK-NEXT: vslh v[[REG2:[0-9]+]], v2, v[[REG1]] +; CHECK-NEXT: vsubuhm v[[REG3:[0-9]+]], v[[REG2]], v2 + +; negtive constant + +define <8 x i16> @test4_v8i16(<8 x i16> %a) { + %tmp.1 = mul nsw <8 x i16> %a, ; <<8 x i16>> [#uses=1] + ret <8 x i16> %tmp.1 +} +; CHECK-LABEL: test4_v8i16: +; CHECK: vspltish v[[REG1:[0-9]+]], 4 +; CHECK-NOT: vmul +; CHECK-NEXT: vslh v[[REG3:[0-9]+]], v2, v[[REG1]] +; CHECK-NEXT: xxlxor v[[REG2:[0-9]+]], +; CHECK-NEXT: vsubuhm v[[REG4:[0-9]+]], v[[REG2]], v[[REG3]] + +define <8 x i16> @test5_v8i16(<8 x i16> %a) { + %tmp.1 = mul nsw <8 x i16> %a, ; <<8 x i16>> [#uses=1] + ret <8 x i16> %tmp.1 +} +; CHECK-LABEL: test5_v8i16: +; CHECK: vspltish v[[REG1:[0-9]+]], 4 +; CHECK-NOT: vmul +; CHECK-NEXT: vslh v[[REG3:[0-9]+]], v2, v[[REG1]] +; CHECK-NEXT: vadduhm v[[REG4:[0-9]+]], v2, v[[REG3]] +; CHECK-NEXT: xxlxor v[[REG2:[0-9]+]], +; CHECK-NEXT: vsubuhm v[[REG5:[0-9]+]], v[[REG2]], v[[REG4]] + +define <8 x i16> @test6_v8i16(<8 x i16> %a) { + %tmp.1 = mul nsw <8 x i16> %a, ; <<8 x i16>> [#uses=1] + ret <8 x i16> %tmp.1 +} +; CHECK-LABEL: test6_v8i16: +; CHECK: vspltish v[[REG1:[0-9]+]], 4 +; CHECK-NOT: vmul +; CHECK-NEXT: vslh v[[REG2:[0-9]+]], v2, v[[REG1]] +; CHECK-NEXT: vsubuhm v[[REG3:[0-9]+]], v2, v[[REG2]] + +; boundary case + +define <8 x i16> @test7_v8i16(<8 x i16> %a) { + %tmp.1 = mul nsw <8 x i16> %a, ; <<8 x i16>> [#uses=1] + ret <8 x i16> %tmp.1 +} +; CHECK-LABEL: test7_v8i16: +; CHECK: vspltish v[[REG1:[0-9]+]], 15 +; CHECK-NOT: vmul +; CHECK-NEXT: vslh v[[REG5:[0-9]+]], v2, v[[REG1]] + +define <8 x i16> @test8_v8i16(<8 x i16> %a) { + %tmp.1 = mul nsw <8 x i16> %a, ; <<8 x i16>> [#uses=1] + ret <8 x i16> %tmp.1 +} +; CHECK-LABEL: test8_v8i16: +; CHECK: vspltish v[[REG1:[0-9]+]], 15 +; CHECK-NOT: vmul +; CHECK-NEXT: vslh v[[REG2:[0-9]+]], v2, v[[REG1]] +; CHECK-NEXT: vsubuhm v[[REG3:[0-9]+]], v[[REG2]], v2 + +define <4 x i32> @test1_v4i32(<4 x i32> %a) { + %tmp.1 = mul nsw <4 x i32> %a, ; <<4 x i32>> [#uses=1] + ret <4 x i32> %tmp.1 +} +; CHECK-LABEL: test1_v4i32: +; CHECK: vspltisw v[[REG1:[0-9]+]], 4 +; CHECK-NOT: vmul +; CHECK-NEXT: vslw v[[REG2:[0-9]+]], v2, v[[REG1]] + +define <4 x i32> @test2_v4i32(<4 x i32> %a) { + %tmp.1 = mul nsw <4 x i32> %a, ; <<4 x i32>> [#uses=1] + ret <4 x i32> %tmp.1 +} +; CHECK-LABEL: test2_v4i32: +; CHECK: vspltisw v[[REG1:[0-9]+]], 4 +; CHECK-NOT: vmul +; CHECK-NEXT: vslw v[[REG2:[0-9]+]], v2, v[[REG1]] +; CHECK-NEXT: vadduwm v[[REG3:[0-9]+]], v2, v[[REG2]] + +define <4 x i32> @test3_v4i32(<4 x i32> %a) { + %tmp.1 = mul nsw <4 x i32> %a, ; <<4 x i32>> [#uses=1] + ret <4 x i32> %tmp.1 +} +; CHECK-LABEL: test3_v4i32: +; CHECK: vspltisw v[[REG1:[0-9]+]], 4 +; CHECK-NOT: vmul +; CHECK-NEXT: vslw v[[REG2:[0-9]+]], v2, v[[REG1]] +; CHECK-NEXT: vsubuwm v[[REG3:[0-9]+]], v[[REG2]], v2 + +; negtive constant + +define <4 x i32> @test4_v4i32(<4 x i32> %a) { + %tmp.1 = mul nsw <4 x i32> %a, ; <<4 x i32>> [#uses=1] + ret <4 x i32> %tmp.1 +} +; CHECK-LABEL: test4_v4i32: +; CHECK: vspltisw v[[REG1:[0-9]+]], 4 +; CHECK-NOT: vmul +; CHECK-NEXT: vslw v[[REG2:[0-9]+]], v2, v[[REG1]] +; CHECK-P8-NEXT: xxlxor v[[REG3:[0-9]+]], +; CHECK-P8-NEXT: vsubuwm v{{[0-9]+}}, v[[REG3]], v[[REG2]] +; CHECK-P9-NEXT: vnegw v{{[0-9]+}}, v[[REG2]] + +define <4 x i32> @test5_v4i32(<4 x i32> %a) { + %tmp.1 = mul nsw <4 x i32> %a, ; <<4 x i32>> [#uses=1] + ret <4 x i32> %tmp.1 +} +; CHECK-LABEL: test5_v4i32: +; CHECK: vspltisw v[[REG1:[0-9]+]], 4 +; CHECK-NOT: vmul +; CHECK-NEXT: vslw v[[REG2:[0-9]+]], v2, v[[REG1]] +; CHECK-NEXT: vadduwm v[[REG3:[0-9]+]], v2, v[[REG2]] +; CHECK-P8-NEXT: xxlxor v[[REG4:[0-9]+]], +; CHECK-P8-NEXT: vsubuwm v{{[0-9]+}}, v[[REG4]], v[[REG3]] +; CHECK-P9-NEXT: vnegw v{{[0-9]+}}, v[[REG3]] + +define <4 x i32> @test6_v4i32(<4 x i32> %a) { + %tmp.1 = mul nsw <4 x i32> %a, ; <<4 x i32>> [#uses=1] + ret <4 x i32> %tmp.1 +} +; CHECK-LABEL: test6_v4i32: +; CHECK: vspltisw v[[REG1:[0-9]+]], 4 +; CHECK-NOT: vmul +; CHECK-NEXT: vslw v[[REG2:[0-9]+]], v2, v[[REG1]] +; CHECK-NEXT: vsubuwm v[[REG3:[0-9]+]], v2, v[[REG2]] + +; boundary case + +define <4 x i32> @test7_v4i32(<4 x i32> %a) { + %tmp.1 = mul nsw <4 x i32> %a, ; <<4 x i32>> [#uses=1] + ret <4 x i32> %tmp.1 +} +; CHECK-LABEL: test7_v4i32: +; CHECK-DAG: vspltisw v[[REG2:[0-9]+]], -16 +; CHECK-DAG: vspltisw v[[REG3:[0-9]+]], 15 +; CHECK-NEXT: vsubuwm v[[REG4:[0-9]+]], v[[REG3]], v[[REG2]] +; CHECK-NOT: vmul +; CHECK-NEXT: vslw v[[REG5:[0-9]+]], v2, v[[REG4]] + +define <4 x i32> @test8_v4i32(<4 x i32> %a) { + %tmp.1 = mul nsw <4 x i32> %a, ; <<4 x i32>> [#uses=1] + ret <4 x i32> %tmp.1 +} +; CHECK-LABEL: test8_v4i32: +; CHECK-DAG: vspltisw v[[REG2:[0-9]+]], -16 +; CHECK-DAG: vspltisw v[[REG3:[0-9]+]], 15 +; CHECK-NEXT: vsubuwm v[[REG4:[0-9]+]], v[[REG3]], v[[REG2]] +; CHECK-NOT: vmul +; CHECK-NEXT: vslw v[[REG5:[0-9]+]], v2, v[[REG4]] +; CHECK-NEXT: vsubuwm v[[REG6:[0-9]+]], v[[REG5]], v2 + +define <2 x i64> @test1_v2i64(<2 x i64> %a) { + %tmp.1 = mul nsw <2 x i64> %a, ; <<2 x i64>> [#uses=1] + ret <2 x i64> %tmp.1 +} +; CHECK-LABEL: test1_v2i64: +; CHECK-P8: lxvd2x vs[[REG1:[0-9]+]], 0, r{{[0-9]+}} +; CHECK-P8-NEXT: xxswapd v[[REG2:[0-9]+]], vs[[REG1]] +; CHECK-P9: lxvx v[[REG2:[0-9]+]], 0, r{{[0-9]+}} +; CHECK-NOT: vmul +; CHECK-NEXT: vsld v{{[0-9]+}}, v2, v[[REG2]] + +define <2 x i64> @test2_v2i64(<2 x i64> %a) { + %tmp.1 = mul nsw <2 x i64> %a, ; <<2 x i64>> [#uses=1] + ret <2 x i64> %tmp.1 +} + +; CHECK-LABEL: test2_v2i64: +; CHECK-P8: lxvd2x vs[[REG1:[0-9]+]], 0, r{{[0-9]+}} +; CHECK-P8-NEXT: xxswapd v[[REG2:[0-9]+]], vs[[REG1]] +; CHECK-P9: lxvx v[[REG2:[0-9]+]], 0, r{{[0-9]+}} +; CHECK-NOT: vmul +; CHECK-NEXT: vsld v[[REG3:[0-9]+]], v2, v[[REG2]] +; CHECK-NEXT: vaddudm v{{[0-9]+}}, v2, v[[REG3]] + +define <2 x i64> @test3_v2i64(<2 x i64> %a) { + %tmp.1 = mul nsw <2 x i64> %a, ; <<2 x i64>> [#uses=1] + ret <2 x i64> %tmp.1 +} + +; CHECK-LABEL: test3_v2i64: +; CHECK-P8: lxvd2x vs[[REG1:[0-9]+]], 0, r{{[0-9]+}} +; CHECK-P8-NEXT: xxswapd v[[REG2:[0-9]+]], vs[[REG1]] +; CHECK-P9: lxvx v[[REG2:[0-9]+]], 0, r{{[0-9]+}} +; CHECK-NOT: vmul +; CHECK-NEXT: vsld v[[REG3:[0-9]+]], v2, v[[REG2]] +; CHECK-NEXT: vsubudm v{{[0-9]+}}, v[[REG3]], v2 + +; negtive constant + +define <2 x i64> @test4_v2i64(<2 x i64> %a) { + %tmp.1 = mul nsw <2 x i64> %a, ; <<2 x i64>> [#uses=1] + ret <2 x i64> %tmp.1 +} + +; CHECK-LABEL: test4_v2i64: +; CHECK-P8: lxvd2x vs[[REG1:[0-9]+]], 0, r{{[0-9]+}} +; CHECK-P8-NEXT: xxswapd v[[REG2:[0-9]+]], vs[[REG1]] +; CHECK-P9: lxvx v[[REG2:[0-9]+]], 0, r{{[0-9]+}} +; CHECK-NOT: vmul +; CHECK-NEXT: vsld v[[REG3:[0-9]+]], v2, v[[REG2]] +; CHECK-P8-NEXT: xxlxor v[[REG4:[0-9]+]], +; CHECK-P8-NEXT: vsubudm v{{[0-9]+}}, v[[REG4]], v[[REG3]] +; CHECK-P9-NEXT: vnegd v[[REG4:[0-9]+]], v[[REG3]] + +define <2 x i64> @test5_v2i64(<2 x i64> %a) { + %tmp.1 = mul nsw <2 x i64> %a, ; <<2 x i64>> [#uses=1] + ret <2 x i64> %tmp.1 +} + +; CHECK-LABEL: test5_v2i64: +; CHECK-P8: lxvd2x vs[[REG1:[0-9]+]], 0, r{{[0-9]+}} +; CHECK-P8-NEXT: xxswapd v[[REG2:[0-9]+]], vs[[REG1]] +; CHECK-P9: lxvx v[[REG2:[0-9]+]], 0, r{{[0-9]+}} +; CHECK-NOT: vmul +; CHECK-NEXT: vsld v[[REG3:[0-9]+]], v2, v[[REG2]] +; CHECK-NEXT: vaddudm v[[REG4:[0-9]+]], v2, v[[REG3]] +; CHECK-P8-NEXT: xxlxor v[[REG5:[0-9]+]], +; CHECK-P8-NEXT: vsubudm v[[REG6:[0-9]+]], v[[REG5]], v[[REG4]] +; CHECK-P9-NEXT: vnegd v{{[0-9]+}}, v[[REG4]] + +define <2 x i64> @test6_v2i64(<2 x i64> %a) { + %tmp.1 = mul nsw <2 x i64> %a, ; <<2 x i64>> [#uses=1] + ret <2 x i64> %tmp.1 +} + +; CHECK-LABEL: test6_v2i64: +; CHECK-P8: lxvd2x vs[[REG1:[0-9]+]], 0, r{{[0-9]+}} +; CHECK-P8-NEXT: xxswapd v[[REG2:[0-9]+]], vs[[REG1]] +; CHECK-P9: lxvx v[[REG2:[0-9]+]], 0, r{{[0-9]+}} +; CHECK-NOT: vmul +; CHECK-NEXT: vsld v[[REG3:[0-9]+]], v2, v[[REG2]] +; CHECK-NEXT: vsubudm v{{[0-9]+}}, v2, v[[REG3]] + + +; boundary case + +define <2 x i64> @test7_v2i64(<2 x i64> %a) { + %tmp.1 = mul nsw <2 x i64> %a, ; <<2 x i64>> [#uses=1] + ret <2 x i64> %tmp.1 +} + +; CHECK-LABEL: test7_v2i64: +; CHECK-P8: lxvd2x vs[[REG1:[0-9]+]], 0, r{{[0-9]+}} +; CHECK-P8-NEXT: xxswapd v[[REG2:[0-9]+]], vs[[REG1]] +; CHECK-P9: lxvx v[[REG2:[0-9]+]], 0, r{{[0-9]+}} +; CHECK-NOT: vmul +; CHECK-NEXT: vsld v[[REG4:[0-9]+]], v2, v[[REG2]] + +define <2 x i64> @test8_v2i64(<2 x i64> %a) { + %tmp.1 = mul nsw <2 x i64> %a, ; <<2 x i64>> [#uses=1] + ret <2 x i64> %tmp.1 +} + +; CHECK-LABEL: test8_v2i64: +; CHECK-P8: lxvd2x vs[[REG1:[0-9]+]], 0, r{{[0-9]+}} +; CHECK-P8-NEXT: xxswapd v[[REG2:[0-9]+]], vs[[REG1]] +; CHECK-P9: lxvx v[[REG2:[0-9]+]], 0, r{{[0-9]+}} +; CHECK-NOT: vmul +; CHECK-NEXT: vsld v[[REG3:[0-9]+]], v2, v[[REG2]] +; CHECK-NEXT: vsubudm v{{[0-9]+}}, v[[REG3]], v2 Index: llvm/trunk/test/CodeGen/PowerPC/mul-const.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/mul-const.ll +++ llvm/trunk/test/CodeGen/PowerPC/mul-const.ll @@ -0,0 +1,79 @@ +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s -mtriple=ppc64-- | FileCheck %s -check-prefixes=PWR8-CHECK,CHECK +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s -mtriple=ppc64le-- | FileCheck %s -check-prefixes=PWR9-CHECK,CHECK + +define i32 @test1(i32 %a) { + %tmp.1 = mul nsw i32 %a, 16 ; [#uses=1] + ret i32 %tmp.1 +} +; CHECK-LABEL: test1: +; CHECK-NOT: mul +; CHECK: slwi r[[REG1:[0-9]+]], r3, 4 + +define i32 @test2(i32 %a) { + %tmp.1 = mul nsw i32 %a, 17 ; [#uses=1] + ret i32 %tmp.1 +} +; CHECK-LABEL: test2: +; CHECK-NOT: mul +; CHECK: slwi r[[REG1:[0-9]+]], r3, 4 +; CHECK-NEXT: add r[[REG2:[0-9]+]], r3, r[[REG1]] + +define i32 @test3(i32 %a) { + %tmp.1 = mul nsw i32 %a, 15 ; [#uses=1] + ret i32 %tmp.1 +} +; CHECK-LABEL: test3: +; CHECK-NOT: mul +; CHECK: slwi r[[REG1:[0-9]+]], r3, 4 +; CHECK-NEXT: subf r[[REG2:[0-9]+]], r3, r[[REG1]] + +; negtive constant + +define i32 @test4(i32 %a) { + %tmp.1 = mul nsw i32 %a, -16 ; [#uses=1] + ret i32 %tmp.1 +} +; CHECK-LABEL: test4: +; CHECK-NOT: mul +; CHECK: slwi r[[REG1:[0-9]+]], r3, 4 +; CHECK-NEXT: neg r[[REG2:[0-9]+]], r[[REG1]] + +define i32 @test5(i32 %a) { + %tmp.1 = mul nsw i32 %a, -17 ; [#uses=1] + ret i32 %tmp.1 +} +; CHECK-LABEL: test5: +; PWR9-CHECK: mulli r[[REG1:[0-9]+]], r3, -17 +; PWR8-CHECK-NOT: mul +; PWR8-CHECK: slwi r[[REG1:[0-9]+]], r3, 4 +; PWR8-CHECK-NEXT: add r[[REG2:[0-9]+]], r3, r[[REG1]] +; PWR8-CHECK-NEXT: neg r{{[0-9]+}}, r[[REG2]] + +define i32 @test6(i32 %a) { + %tmp.1 = mul nsw i32 %a, -15 ; [#uses=1] + ret i32 %tmp.1 +} +; CHECK-LABEL: test6: +; CHECK-NOT: mul +; CHECK: slwi r[[REG1:[0-9]+]], r3, 4 +; CHECK-NEXT: subf r[[REG2:[0-9]+]], r[[REG1]], r3 +; CHECK-NOT: neg + +; boundary case + +define i32 @test7(i32 %a) { + %tmp.1 = mul nsw i32 %a, -2147483648 ; [#uses=1] + ret i32 %tmp.1 +} +; CHECK-LABEL: test7: +; CHECK-NOT: mul +; CHECK: slwi r[[REG1:[0-9]+]], r3, 31 + +define i32 @test8(i32 %a) { + %tmp.1 = mul nsw i32 %a, 2147483647 ; [#uses=1] + ret i32 %tmp.1 +} +; CHECK-LABEL: test8: +; CHECK-NOT: mul +; CHECK: slwi r[[REG1:[0-9]+]], r3, 31 +; CHECK-NEXT: subf r[[REG2:[0-9]+]], r3, r[[REG1]] Index: llvm/trunk/test/CodeGen/PowerPC/mul-neg-power-2.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/mul-neg-power-2.ll +++ llvm/trunk/test/CodeGen/PowerPC/mul-neg-power-2.ll @@ -1,8 +0,0 @@ -; RUN: llc -verify-machineinstrs < %s -mtriple=ppc32-- | not grep mul - -define i32 @test1(i32 %a) { - %tmp.1 = mul i32 %a, -2 ; [#uses=1] - %tmp.2 = add i32 %tmp.1, 63 ; [#uses=1] - ret i32 %tmp.2 -} - Index: llvm/trunk/test/CodeGen/PowerPC/mulli64.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/mulli64.ll +++ llvm/trunk/test/CodeGen/PowerPC/mulli64.ll @@ -1,16 +0,0 @@ -; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s -target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" -target triple = "powerpc64-unknown-linux-gnu" - -define i64 @foo(i64 %a) #0 { -entry: - %mul = mul nsw i64 %a, 3 - ret i64 %mul -} - -; CHECK-LABEL: @foo -; CHECK: mulli 3, 3, 3 -; CHECK: blr - -attributes #0 = { nounwind readnone } -