Index: lib/Target/PowerPC/PPCISelLowering.h =================================================================== --- lib/Target/PowerPC/PPCISelLowering.h +++ lib/Target/PowerPC/PPCISelLowering.h @@ -918,6 +918,7 @@ SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerREM(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const; SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const; Index: lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- lib/Target/PowerPC/PPCISelLowering.cpp +++ lib/Target/PowerPC/PPCISelLowering.cpp @@ -204,11 +204,23 @@ setOperationAction(ISD::FNEARBYINT, MVT::ppcf128, Expand); setOperationAction(ISD::FREM, MVT::ppcf128, Expand); - // PowerPC has no SREM/UREM instructions - setOperationAction(ISD::SREM, MVT::i32, Expand); - setOperationAction(ISD::UREM, MVT::i32, Expand); - setOperationAction(ISD::SREM, MVT::i64, Expand); - setOperationAction(ISD::UREM, MVT::i64, Expand); + // PowerPC has no SREM/UREM instructions unless we are on P9 + // On P9 we may use a hardware instruction to compute the remainder. + // The instructions are not legalized directly because in the cases where the + // result of both the remainder and the division is required it is more + // efficient to compute the remainder from the result of the division rather + // than use the remainder instruction. + if (Subtarget.isISA3_0()) { + setOperationAction(ISD::SREM, MVT::i32, Custom); + setOperationAction(ISD::UREM, MVT::i32, Custom); + setOperationAction(ISD::SREM, MVT::i64, Custom); + setOperationAction(ISD::UREM, MVT::i64, Custom); + } else { + setOperationAction(ISD::SREM, MVT::i32, Expand); + setOperationAction(ISD::UREM, MVT::i32, Expand); + setOperationAction(ISD::SREM, MVT::i64, Expand); + setOperationAction(ISD::UREM, MVT::i64, Expand); + } // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM. setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); @@ -8389,6 +8401,18 @@ return SDValue(); } +SDValue PPCTargetLowering::LowerREM(SDValue Op, SelectionDAG &DAG) const { + // Check for a DIV with the same operands as this REM. + for (auto UI : Op.getOperand(1)->uses()) { + if ((Op.getOpcode() == ISD::SREM && UI->getOpcode() == ISD::SDIV) || + (Op.getOpcode() == ISD::UREM && UI->getOpcode() == ISD::UDIV)) + if (UI->getOperand(0) == Op.getOperand(0) && + UI->getOperand(1) == Op.getOperand(1)) + return SDValue(); + } + return Op; +} + SDValue PPCTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); @@ -8857,6 +8881,9 @@ case ISD::INTRINSIC_VOID: return LowerINTRINSIC_VOID(Op, DAG); + case ISD::SREM: + case ISD::UREM: + return LowerREM(Op, DAG); } } Index: lib/Target/PowerPC/PPCInstr64Bit.td =================================================================== --- lib/Target/PowerPC/PPCInstr64Bit.td +++ lib/Target/PowerPC/PPCInstr64Bit.td @@ -683,6 +683,16 @@ "divde $rT, $rA, $rB", IIC_IntDivD, [(set i64:$rT, (int_ppc_divde g8rc:$rA, g8rc:$rB))]>, isPPC64, Requires<[HasExtDiv]>; + +let Predicates = [IsISA3_0] in { +def MODSD : XForm_8<31, 777, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), + "modsd $rT, $rA, $rB", IIC_IntDivW, + [(set i64:$rT, (srem i64:$rA, i64:$rB))]>; +def MODUD : XForm_8<31, 265, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), + "modud $rT, $rA, $rB", IIC_IntDivW, + [(set i64:$rT, (urem i64:$rA, i64:$rB))]>; +} + let Defs = [CR0] in def DIVDEo : XOForm_1<31, 425, 0, (outs g8rc:$rT), (ins g8rc:$rA, g8rc:$rB), "divde. $rT, $rA, $rB", IIC_IntDivD, Index: lib/Target/PowerPC/PPCInstrInfo.td =================================================================== --- lib/Target/PowerPC/PPCInstrInfo.td +++ lib/Target/PowerPC/PPCInstrInfo.td @@ -2544,6 +2544,14 @@ "mffs. $rT", IIC_IntMFFS, []>, isDOT; } +let Predicates = [IsISA3_0] in { +def MODSW : XForm_8<31, 779, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), + "modsw $rT, $rA, $rB", IIC_IntDivW, + [(set i32:$rT, (srem i32:$rA, i32:$rB))]>; +def MODUW : XForm_8<31, 267, (outs gprc:$rT), (ins gprc:$rA, gprc:$rB), + "moduw $rT, $rA, $rB", IIC_IntDivW, + [(set i32:$rT, (urem i32:$rA, i32:$rB))]>; +} let PPC970_Unit = 1, hasSideEffects = 0 in { // FXU Operations. // XO-Form instructions. Arithmetic instructions that can set overflow bit Index: test/CodeGen/PowerPC/ppc64-P9-mod.ll =================================================================== --- /dev/null +++ test/CodeGen/PowerPC/ppc64-P9-mod.ll @@ -0,0 +1,263 @@ +; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr9 -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 | FileCheck %s -check-prefix=CHECK-PWR8 -implicit-check-not mod[us][wd] + +@mod_resultsw = common local_unnamed_addr global i32 0, align 4 +@mod_resultud = common local_unnamed_addr global i64 0, align 8 +@div_resultsw = common local_unnamed_addr global i32 0, align 4 +@mod_resultuw = common local_unnamed_addr global i32 0, align 4 +@div_resultuw = common local_unnamed_addr global i32 0, align 4 +@div_resultsd = common local_unnamed_addr global i64 0, align 8 +@mod_resultsd = common local_unnamed_addr global i64 0, align 8 +@div_resultud = common local_unnamed_addr global i64 0, align 8 + +; Function Attrs: norecurse nounwind +define void @modulo_sw(i32 signext %a, i32 signext %b) local_unnamed_addr { +entry: + %rem = srem i32 %a, %b + store i32 %rem, i32* @mod_resultsw, align 4 + ret void +; CHECK-LABEL: modulo_sw +; CHECK: modsw {{[0-9]+}}, 3, 4 +; CHECK: blr +; CHECK-PWR8-LABEL: modulo_sw +; CHECK-PWR8: div +; CHECK-PWR8: mull +; CHECK-PWR8: sub +; CHECK-PWR8: blr +} + +; Function Attrs: norecurse nounwind readnone +define zeroext i32 @modulo_uw(i32 zeroext %a, i32 zeroext %b) local_unnamed_addr { +entry: + %rem = urem i32 %a, %b + ret i32 %rem +; CHECK-LABEL: modulo_uw +; CHECK: moduw {{[0-9]+}}, 3, 4 +; CHECK: blr +; CHECK-PWR8-LABEL: modulo_uw +; CHECK-PWR8: div +; CHECK-PWR8: mull +; CHECK-PWR8: sub +; CHECK-PWR8: blr +} + +; Function Attrs: norecurse nounwind readnone +define i64 @modulo_sd(i64 %a, i64 %b) local_unnamed_addr { +entry: + %rem = srem i64 %a, %b + ret i64 %rem +; CHECK-LABEL: modulo_sd +; CHECK: modsd {{[0-9]+}}, 3, 4 +; CHECK: blr +; CHECK-PWR8-LABEL: modulo_sd +; CHECK-PWR8: div +; CHECK-PWR8: mull +; CHECK-PWR8: sub +; CHECK-PWR8: blr +} + +; Function Attrs: norecurse nounwind +define void @modulo_ud(i64 %a, i64 %b) local_unnamed_addr { +entry: + %rem = urem i64 %a, %b + store i64 %rem, i64* @mod_resultud, align 8 + ret void +; CHECK-LABEL: modulo_ud +; CHECK: modud {{[0-9]+}}, 3, 4 +; CHECK: blr +; CHECK-PWR8-LABEL: modulo_ud +; CHECK-PWR8: div +; CHECK-PWR8: mull +; CHECK-PWR8: sub +; CHECK-PWR8: blr +} + +; Function Attrs: norecurse nounwind +define void @modulo_div_sw(i32 signext %a, i32 signext %b) local_unnamed_addr { +entry: + %rem = srem i32 %a, %b + store i32 %rem, i32* @mod_resultsw, align 4 + %div = sdiv i32 %a, %b + store i32 %div, i32* @div_resultsw, align 4 + ret void +; CHECK-LABEL: modulo_div_sw +; CHECK-NOT: modsw +; CHECK: div +; CHECK-NOT: modsw +; CHECK: mull +; CHECK-NOT: modsw +; CHECK: sub +; CHECK: blr +; CHECK-PWR8-LABEL: modulo_div_sw +; CHECK-PWR8: div +; CHECK-PWR8: mull +; CHECK-PWR8: sub +; CHECK-PWR8: blr +} + +; Function Attrs: norecurse nounwind +define void @modulo_div_abc_sw(i32 signext %a, i32 signext %b, i32 signext %c) local_unnamed_addr { +entry: + %rem = srem i32 %a, %c + store i32 %rem, i32* @mod_resultsw, align 4 + %div = sdiv i32 %b, %c + store i32 %div, i32* @div_resultsw, align 4 + ret void +; CHECK-LABEL: modulo_div_abc_sw +; CHECK: modsw {{[0-9]+}}, 3, 5 +; CHECK: blr +; CHECK-PWR8-LABEL: modulo_div_abc_sw +; CHECK-PWR8: div +; CHECK-PWR8: mull +; CHECK-PWR8: sub +; CHECK-PWR8: blr +} + +; Function Attrs: norecurse nounwind +define void @modulo_div_uw(i32 zeroext %a, i32 zeroext %b) local_unnamed_addr { +entry: + %rem = urem i32 %a, %b + store i32 %rem, i32* @mod_resultuw, align 4 + %div = udiv i32 %a, %b + store i32 %div, i32* @div_resultuw, align 4 + ret void +; CHECK-LABEL: modulo_div_uw +; CHECK-NOT: modsw +; CHECK: div +; CHECK-NOT: modsw +; CHECK: mull +; CHECK-NOT: modsw +; CHECK: sub +; CHECK: blr +; CHECK-PWR8-LABEL: modulo_div_uw +; CHECK-PWR8: div +; CHECK-PWR8: mull +; CHECK-PWR8: sub +; CHECK-PWR8: blr +} + +; Function Attrs: norecurse nounwind +define void @modulo_div_swuw(i32 signext %a, i32 signext %b) local_unnamed_addr { +entry: + %rem = srem i32 %a, %b + store i32 %rem, i32* @mod_resultsw, align 4 + %div = udiv i32 %a, %b + store i32 %div, i32* @div_resultsw, align 4 + ret void +; CHECK-LABEL: modulo_div_swuw +; CHECK: modsw {{[0-9]+}}, 3, 4 +; CHECK: blr +; CHECK-PWR8-LABEL: modulo_div_swuw +; CHECK-PWR8: div +; CHECK-PWR8: mull +; CHECK-PWR8: sub +; CHECK-PWR8: blr +} + +; Function Attrs: norecurse nounwind +define void @modulo_div_udsd(i64 %a, i64 %b) local_unnamed_addr { +entry: + %rem = urem i64 %a, %b + store i64 %rem, i64* @mod_resultud, align 8 + %div = sdiv i64 %a, %b + store i64 %div, i64* @div_resultsd, align 8 + ret void +; CHECK-LABEL: modulo_div_udsd +; CHECK: modud {{[0-9]+}}, 3, 4 +; CHECK: blr +; CHECK-PWR8-LABEL: modulo_div_udsd +; CHECK-PWR8: div +; CHECK-PWR8: mull +; CHECK-PWR8: sub +; CHECK-PWR8: blr +} + +; Function Attrs: norecurse nounwind +define void @modulo_const32_sw(i32 signext %a) local_unnamed_addr { +entry: + %rem = srem i32 %a, 32 + store i32 %rem, i32* @mod_resultsw, align 4 + ret void +; CHECK-LABEL: modulo_const32_sw +; CHECK-NOT: modsw +; CHECK: srawi +; CHECK-NOT: modsw +; CHECK: addze +; CHECK-NOT: modsw +; CHECK: slwi +; CHECK-NOT: modsw +; CHECK: subf +; CHECK-NOT: modsw +; CHECK: blr +; CHECK-PWR8-LABEL: modulo_const32_sw +; CHECK-PWR8: srawi +; CHECK-PWR8: addze +; CHECK-PWR8: slwi +; CHECK-PWR8: subf +; CHECK-PWR8: blr +} + +; Function Attrs: norecurse nounwind readnone +define signext i32 @modulo_const3_sw(i32 signext %a) local_unnamed_addr { +entry: + %rem = srem i32 %a, 3 + ret i32 %rem +; CHECK-LABEL: modulo_const3_sw +; CHECK-NOT: modsw +; CHECK: mull +; CHECK-NOT: modsw +; CHECK: sub +; CHECK-NOT: modsw +; CHECK: blr +; CHECK-PWR8-LABEL: modulo_const3_sw +; CHECK-PWR8: mull +; CHECK-PWR8: sub +; CHECK-PWR8: blr +} + +; Function Attrs: norecurse nounwind readnone +define signext i32 @const2_modulo_sw(i32 signext %a) local_unnamed_addr { +entry: + %rem = srem i32 2, %a + ret i32 %rem +; CHECK-LABEL: const2_modulo_sw +; CHECK: modsw {{[0-9]+}}, {{[0-9]+}}, 3 +; CHECK: blr +; CHECK-PWR8-LABEL: const2_modulo_sw +; CHECK-PWR8: div +; CHECK-PWR8: mull +; CHECK-PWR8: sub +; CHECK-PWR8: blr +} + +; Function Attrs: norecurse nounwind +; FIXME On power 9 this test will still produce modsw because the divide is in +; a different block than the remainder. Due to the nature of the SDAG we cannot +; see the div in the other block. +define void @blocks_modulo_div_sw(i32 signext %a, i32 signext %b, i32 signext %c) local_unnamed_addr { +entry: + %div = sdiv i32 %a, %b + store i32 %div, i32* @div_resultsw, align 4 + %cmp = icmp sgt i32 %c, 0 + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + %rem = srem i32 %a, %b + store i32 %rem, i32* @mod_resultsw, align 4 + br label %if.end + +if.end: ; preds = %if.then, %entry + ret void +; CHECK-LABEL: blocks_modulo_div_sw +; CHECK: div +; CHECK: modsw {{[0-9]+}}, 3, 4 +; CHECK: blr +; CHECK-PWR8-LABEL: blocks_modulo_div_sw +; CHECK-PWR8: div +; CHECK-PWR8: mull +; CHECK-PWR8: sub +; CHECK-PWR8: blr +} + + Index: test/MC/Disassembler/PowerPC/ppc64-encoding.txt =================================================================== --- test/MC/Disassembler/PowerPC/ppc64-encoding.txt +++ test/MC/Disassembler/PowerPC/ppc64-encoding.txt @@ -352,6 +352,18 @@ # CHECK: divweu. 2, 3, 4 0x7c 0x43 0x23 0x17 +# CHECK: modsw 2, 3, 4 +0x7c 0x43 0x26 0x16 + +# CHECK: moduw 2, 3, 4 +0x7c 0x43 0x22 0x16 + +# CHECK: modsd 2, 3, 4 +0x7c 0x43 0x26 0x12 + +# CHECK: modud 2, 3, 4 +0x7c 0x43 0x22 0x12 + # CHECK: mulld 2, 3, 4 0x7c 0x43 0x21 0xd2 Index: test/MC/Disassembler/PowerPC/ppc64le-encoding.txt =================================================================== --- test/MC/Disassembler/PowerPC/ppc64le-encoding.txt +++ test/MC/Disassembler/PowerPC/ppc64le-encoding.txt @@ -349,6 +349,18 @@ # CHECK: divweu. 2, 3, 4 0x17 0x23 0x43 0x7c +# CHECK: modsw 2, 3, 4 +0x16 0x26 0x43 0x7c + +# CHECK: moduw 2, 3, 4 +0x16 0x22 0x43 0x7c + +# CHECK: modsd 2, 3, 4 +0x12 0x26 0x43 0x7c + +# CHECK: modud 2, 3, 4 +0x12 0x22 0x43 0x7c + # CHECK: mulld 2, 3, 4 0xd2 0x21 0x43 0x7c Index: test/MC/PowerPC/ppc64-encoding.s =================================================================== --- test/MC/PowerPC/ppc64-encoding.s +++ test/MC/PowerPC/ppc64-encoding.s @@ -493,6 +493,19 @@ # FIXME: divweuo 2, 3, 4 # FIXME: divweuo. 2, 3, 4 +# CHECK-BE: modsw 2, 3, 4 # encoding: [0x7c,0x43,0x26,0x16] +# CHECK-LE: modsw 2, 3, 4 # encoding: [0x16,0x26,0x43,0x7c] + modsw 2, 3, 4 +# CHECK-BE: moduw 2, 3, 4 # encoding: [0x7c,0x43,0x22,0x16] +# CHECK-LE: moduw 2, 3, 4 # encoding: [0x16,0x22,0x43,0x7c] + moduw 2, 3, 4 +# CHECK-BE: modsd 2, 3, 4 # encoding: [0x7c,0x43,0x26,0x12] +# CHECK-LE: modsd 2, 3, 4 # encoding: [0x12,0x26,0x43,0x7c] + modsd 2, 3, 4 +# CHECK-BE: modud 2, 3, 4 # encoding: [0x7c,0x43,0x22,0x12] +# CHECK-LE: modud 2, 3, 4 # encoding: [0x12,0x22,0x43,0x7c] + modud 2, 3, 4 + # CHECK-BE: mulld 2, 3, 4 # encoding: [0x7c,0x43,0x21,0xd2] # CHECK-LE: mulld 2, 3, 4 # encoding: [0xd2,0x21,0x43,0x7c] mulld 2, 3, 4