diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -1652,6 +1652,10 @@ virtual bool isJumpTableRelative() const; + /// Return true if a mulh[s|u] node for a specific type is cheaper than + /// a multiply followed by a shift. This is false by default. + virtual bool isMulhCheaperThanMulShift(EVT Type) const { return false; } + /// If a physical register, this specifies the register that /// llvm.savestack/llvm.restorestack should save and restore. unsigned getStackPointerRegisterToSaveRestore() const { diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -4118,7 +4118,7 @@ // If the type twice as wide is legal, transform the mulhs to a wider multiply // plus a shift. - if (VT.isSimple() && !VT.isVector()) { + if (!TLI.isMulhCheaperThanMulShift(VT) && VT.isSimple() && !VT.isVector()) { MVT Simple = VT.getSimpleVT(); unsigned SimpleSize = Simple.getSizeInBits(); EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); @@ -4174,7 +4174,7 @@ // If the type twice as wide is legal, transform the mulhu to a wider multiply // plus a shift. - if (VT.isSimple() && !VT.isVector()) { + if (!TLI.isMulhCheaperThanMulShift(VT) && VT.isSimple() && !VT.isVector()) { MVT Simple = VT.getSimpleVT(); unsigned SimpleSize = Simple.getSizeInBits(); EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h --- a/llvm/lib/Target/PowerPC/PPCISelLowering.h +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -950,6 +950,11 @@ Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override; + /// isMulhCheaperThanMulShift - Return true if a mulh[s|u] node for a + /// specific type is cheaper than a multiply followed by a shift. + /// This is true for words and doublewords on 64-bit PowerPC. + bool isMulhCheaperThanMulShift(EVT Type) const override; + /// Override to support customized stack guard loading. bool useLoadStackGuardNode() const override; void insertSSPDeclarations(Module &M) const override; diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1401,6 +1401,16 @@ return VT.isScalarInteger(); } +/// isMulhCheaperThanMulShift - Return true if a mulh[s|u] node for a specific +/// type is cheaper than a multiply followed by a shift. +/// This is true for words and doublewords on 64-bit PowerPC. +bool PPCTargetLowering::isMulhCheaperThanMulShift(EVT Type) const { + if (Subtarget.isPPC64() && (isOperationLegal(ISD::MULHS, Type) || + isOperationLegal(ISD::MULHU, Type))) + return true; + return TargetLowering::isMulhCheaperThanMulShift(Type); +} + const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { switch ((PPCISD::NodeType)Opcode) { case PPCISD::FIRST_NUMBER: break; diff --git a/llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll b/llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll --- a/llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll +++ b/llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll @@ -509,10 +509,9 @@ ; CHECK-NEXT: bdz .LBB6_9 ; CHECK-NEXT: .LBB6_4: # ; CHECK-NEXT: lbzu r0, 1(r5) -; CHECK-NEXT: clrldi r27, r0, 32 -; CHECK-NEXT: mulld r27, r27, r4 -; CHECK-NEXT: rldicl r27, r27, 31, 33 -; CHECK-NEXT: slwi r26, r27, 1 +; CHECK-NEXT: mulhwu r27, r0, r4 +; CHECK-NEXT: rlwinm r26, r27, 0, 0, 30 +; CHECK-NEXT: srwi r27, r27, 1 ; CHECK-NEXT: add r27, r27, r26 ; CHECK-NEXT: subf r0, r27, r0 ; CHECK-NEXT: cmplwi r0, 1 diff --git a/llvm/test/CodeGen/PowerPC/machine-pre.ll b/llvm/test/CodeGen/PowerPC/machine-pre.ll --- a/llvm/test/CodeGen/PowerPC/machine-pre.ll +++ b/llvm/test/CodeGen/PowerPC/machine-pre.ll @@ -91,14 +91,12 @@ ; CHECK-P9-NEXT: bl bar ; CHECK-P9-NEXT: nop ; CHECK-P9-NEXT: mr r30, r3 -; CHECK-P9-NEXT: extsw r3, r28 -; CHECK-P9-NEXT: mulld r4, r3, r27 -; CHECK-P9-NEXT: rldicl r5, r4, 1, 63 -; CHECK-P9-NEXT: rldicl r4, r4, 32, 32 -; CHECK-P9-NEXT: add r4, r4, r5 -; CHECK-P9-NEXT: slwi r5, r4, 1 -; CHECK-P9-NEXT: add r4, r4, r5 -; CHECK-P9-NEXT: subf r3, r4, r3 +; CHECK-P9-NEXT: mulhw r3, r28, r27 +; CHECK-P9-NEXT: srwi r4, r3, 31 +; CHECK-P9-NEXT: add r3, r3, r4 +; CHECK-P9-NEXT: slwi r4, r3, 1 +; CHECK-P9-NEXT: add r3, r3, r4 +; CHECK-P9-NEXT: subf r3, r3, r28 ; CHECK-P9-NEXT: cmplwi r3, 1 ; CHECK-P9-NEXT: beq cr0, .LBB1_1 ; CHECK-P9-NEXT: # %bb.5: # %while.cond diff --git a/llvm/test/CodeGen/PowerPC/ppc64-P9-mod.ll b/llvm/test/CodeGen/PowerPC/ppc64-P9-mod.ll --- a/llvm/test/CodeGen/PowerPC/ppc64-P9-mod.ll +++ b/llvm/test/CodeGen/PowerPC/ppc64-P9-mod.ll @@ -205,13 +205,13 @@ ret i32 %rem ; CHECK-LABEL: modulo_const3_sw ; CHECK-NOT: modsw -; CHECK: mull +; CHECK: mulh ; CHECK-NOT: modsw ; CHECK: sub ; CHECK-NOT: modsw ; CHECK: blr ; CHECK-PWR8-LABEL: modulo_const3_sw -; CHECK-PWR8: mull +; CHECK-PWR8: mulh ; CHECK-PWR8: sub ; CHECK-PWR8: blr } diff --git a/llvm/test/CodeGen/PowerPC/srem-vector-lkk.ll b/llvm/test/CodeGen/PowerPC/srem-vector-lkk.ll --- a/llvm/test/CodeGen/PowerPC/srem-vector-lkk.ll +++ b/llvm/test/CodeGen/PowerPC/srem-vector-lkk.ll @@ -13,12 +13,10 @@ ; P9LE: # %bb.0: ; P9LE-NEXT: li r3, 0 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: extsh r4, r3 ; P9LE-NEXT: lis r5, -21386 ; P9LE-NEXT: ori r5, r5, 37253 -; P9LE-NEXT: extsw r4, r4 -; P9LE-NEXT: mulld r5, r4, r5 -; P9LE-NEXT: rldicl r5, r5, 32, 32 +; P9LE-NEXT: extsh r4, r3 +; P9LE-NEXT: mulhw r5, r4, r5 ; P9LE-NEXT: add r4, r5, r4 ; P9LE-NEXT: srwi r5, r4, 31 ; P9LE-NEXT: srawi r4, r4, 6 @@ -30,10 +28,8 @@ ; P9LE-NEXT: li r3, 2 ; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: extsh r4, r3 -; P9LE-NEXT: extsw r4, r4 ; P9LE-NEXT: ori r5, r5, 63421 -; P9LE-NEXT: mulld r5, r4, r5 -; P9LE-NEXT: rldicl r5, r5, 32, 32 +; P9LE-NEXT: mulhw r5, r4, r5 ; P9LE-NEXT: subf r4, r4, r5 ; P9LE-NEXT: srwi r5, r4, 31 ; P9LE-NEXT: srawi r4, r4, 6 @@ -46,11 +42,9 @@ ; P9LE-NEXT: li r3, 4 ; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: extsh r4, r3 -; P9LE-NEXT: extsw r4, r4 ; P9LE-NEXT: ori r5, r5, 33437 -; P9LE-NEXT: mulld r4, r4, r5 -; P9LE-NEXT: rldicl r5, r4, 1, 63 -; P9LE-NEXT: rldicl r4, r4, 32, 32 +; P9LE-NEXT: mulhw r4, r4, r5 +; P9LE-NEXT: srwi r5, r4, 31 ; P9LE-NEXT: srawi r4, r4, 5 ; P9LE-NEXT: add r4, r4, r5 ; P9LE-NEXT: lis r5, -16728 @@ -61,11 +55,9 @@ ; P9LE-NEXT: li r3, 6 ; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: extsh r4, r3 -; P9LE-NEXT: extsw r4, r4 ; P9LE-NEXT: ori r5, r5, 63249 -; P9LE-NEXT: mulld r4, r4, r5 -; P9LE-NEXT: rldicl r5, r4, 1, 63 -; P9LE-NEXT: rldicl r4, r4, 32, 32 +; P9LE-NEXT: mulhw r4, r4, r5 +; P9LE-NEXT: srwi r5, r4, 31 ; P9LE-NEXT: srawi r4, r4, 8 ; P9LE-NEXT: add r4, r4, r5 ; P9LE-NEXT: mulli r4, r4, -1003 @@ -82,12 +74,10 @@ ; P9BE: # %bb.0: ; P9BE-NEXT: li r3, 2 ; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: extsh r3, r3 ; P9BE-NEXT: lis r4, 31710 ; P9BE-NEXT: ori r4, r4, 63421 -; P9BE-NEXT: extsw r3, r3 -; P9BE-NEXT: mulld r4, r3, r4 -; P9BE-NEXT: rldicl r4, r4, 32, 32 +; P9BE-NEXT: extsh r3, r3 +; P9BE-NEXT: mulhw r4, r3, r4 ; P9BE-NEXT: subf r4, r3, r4 ; P9BE-NEXT: srwi r5, r4, 31 ; P9BE-NEXT: srawi r4, r4, 6 @@ -100,10 +90,8 @@ ; P9BE-NEXT: li r3, 0 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: extsh r3, r3 -; P9BE-NEXT: extsw r3, r3 ; P9BE-NEXT: ori r4, r4, 37253 -; P9BE-NEXT: mulld r4, r3, r4 -; P9BE-NEXT: rldicl r4, r4, 32, 32 +; P9BE-NEXT: mulhw r4, r3, r4 ; P9BE-NEXT: add r4, r4, r3 ; P9BE-NEXT: srwi r5, r4, 31 ; P9BE-NEXT: srawi r4, r4, 6 @@ -116,11 +104,9 @@ ; P9BE-NEXT: li r3, 6 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: extsh r3, r3 -; P9BE-NEXT: extsw r3, r3 ; P9BE-NEXT: ori r4, r4, 63249 -; P9BE-NEXT: mulld r4, r3, r4 -; P9BE-NEXT: rldicl r5, r4, 1, 63 -; P9BE-NEXT: rldicl r4, r4, 32, 32 +; P9BE-NEXT: mulhw r4, r3, r4 +; P9BE-NEXT: srwi r5, r4, 31 ; P9BE-NEXT: srawi r4, r4, 8 ; P9BE-NEXT: add r4, r4, r5 ; P9BE-NEXT: mulli r4, r4, -1003 @@ -132,11 +118,9 @@ ; P9BE-NEXT: li r3, 4 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: extsh r3, r3 -; P9BE-NEXT: extsw r3, r3 ; P9BE-NEXT: ori r4, r4, 33437 -; P9BE-NEXT: mulld r4, r3, r4 -; P9BE-NEXT: rldicl r5, r4, 1, 63 -; P9BE-NEXT: rldicl r4, r4, 32, 32 +; P9BE-NEXT: mulhw r4, r3, r4 +; P9BE-NEXT: srwi r5, r4, 31 ; P9BE-NEXT: srawi r4, r4, 5 ; P9BE-NEXT: add r4, r4, r5 ; P9BE-NEXT: mulli r4, r4, 98 @@ -150,61 +134,51 @@ ; P8LE-LABEL: fold_srem_vec_1: ; P8LE: # %bb.0: ; P8LE-NEXT: xxswapd vs0, v2 -; P8LE-NEXT: lis r4, 21399 -; P8LE-NEXT: lis r9, -16728 -; P8LE-NEXT: lis r11, -21386 -; P8LE-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; P8LE-NEXT: ori r4, r4, 33437 -; P8LE-NEXT: ori r9, r9, 63249 -; P8LE-NEXT: ori r11, r11, 37253 -; P8LE-NEXT: mffprd r5, f0 -; P8LE-NEXT: rldicl r3, r5, 32, 48 -; P8LE-NEXT: rldicl r6, r5, 16, 48 -; P8LE-NEXT: clrldi r7, r5, 48 -; P8LE-NEXT: extsh r8, r3 -; P8LE-NEXT: extsh r10, r6 -; P8LE-NEXT: rldicl r5, r5, 48, 48 -; P8LE-NEXT: extsw r8, r8 -; P8LE-NEXT: extsh r12, r7 -; P8LE-NEXT: extsw r10, r10 -; P8LE-NEXT: mulld r4, r8, r4 -; P8LE-NEXT: lis r8, 31710 -; P8LE-NEXT: extsh r0, r5 -; P8LE-NEXT: extsw r12, r12 -; P8LE-NEXT: mulld r9, r10, r9 -; P8LE-NEXT: ori r8, r8, 63421 -; P8LE-NEXT: extsw r10, r0 -; P8LE-NEXT: mulld r11, r12, r11 -; P8LE-NEXT: mulld r8, r10, r8 -; P8LE-NEXT: rldicl r0, r4, 1, 63 -; P8LE-NEXT: rldicl r4, r4, 32, 32 -; P8LE-NEXT: rldicl r30, r9, 1, 63 -; P8LE-NEXT: rldicl r9, r9, 32, 32 -; P8LE-NEXT: rldicl r11, r11, 32, 32 -; P8LE-NEXT: rldicl r8, r8, 32, 32 -; P8LE-NEXT: add r11, r11, r12 -; P8LE-NEXT: srawi r4, r4, 5 -; P8LE-NEXT: subf r8, r10, r8 -; P8LE-NEXT: srawi r9, r9, 8 -; P8LE-NEXT: srwi r10, r11, 31 -; P8LE-NEXT: add r4, r4, r0 -; P8LE-NEXT: srawi r11, r11, 6 -; P8LE-NEXT: add r9, r9, r30 -; P8LE-NEXT: ld r30, -16(r1) # 8-byte Folded Reload -; P8LE-NEXT: add r10, r11, r10 -; P8LE-NEXT: srwi r11, r8, 31 -; P8LE-NEXT: srawi r8, r8, 6 -; P8LE-NEXT: mulli r4, r4, 98 -; P8LE-NEXT: mulli r9, r9, -1003 -; P8LE-NEXT: add r8, r8, r11 -; P8LE-NEXT: mulli r10, r10, 95 -; P8LE-NEXT: mulli r8, r8, -124 -; P8LE-NEXT: subf r3, r4, r3 -; P8LE-NEXT: subf r4, r9, r6 +; P8LE-NEXT: lis r3, 21399 +; P8LE-NEXT: lis r9, -21386 +; P8LE-NEXT: lis r11, 31710 +; P8LE-NEXT: lis r8, -16728 +; P8LE-NEXT: ori r3, r3, 33437 +; P8LE-NEXT: ori r9, r9, 37253 +; P8LE-NEXT: ori r8, r8, 63249 +; P8LE-NEXT: mffprd r4, f0 +; P8LE-NEXT: rldicl r5, r4, 32, 48 +; P8LE-NEXT: clrldi r7, r4, 48 +; P8LE-NEXT: rldicl r6, r4, 16, 48 +; P8LE-NEXT: rldicl r4, r4, 48, 48 +; P8LE-NEXT: extsh r10, r5 +; P8LE-NEXT: extsh r0, r7 +; P8LE-NEXT: mulhw r3, r10, r3 +; P8LE-NEXT: ori r10, r11, 63421 +; P8LE-NEXT: extsh r11, r4 +; P8LE-NEXT: extsh r12, r6 +; P8LE-NEXT: mulhw r9, r0, r9 +; P8LE-NEXT: mulhw r10, r11, r10 +; P8LE-NEXT: mulhw r8, r12, r8 +; P8LE-NEXT: srwi r12, r3, 31 +; P8LE-NEXT: srawi r3, r3, 5 +; P8LE-NEXT: add r9, r9, r0 +; P8LE-NEXT: subf r10, r11, r10 +; P8LE-NEXT: add r3, r3, r12 +; P8LE-NEXT: srwi r11, r9, 31 +; P8LE-NEXT: srawi r9, r9, 6 +; P8LE-NEXT: srwi r12, r8, 31 +; P8LE-NEXT: srawi r8, r8, 8 +; P8LE-NEXT: add r9, r9, r11 +; P8LE-NEXT: srwi r11, r10, 31 +; P8LE-NEXT: srawi r10, r10, 6 +; P8LE-NEXT: add r8, r8, r12 +; P8LE-NEXT: mulli r3, r3, 98 +; P8LE-NEXT: add r10, r10, r11 +; P8LE-NEXT: mulli r8, r8, -1003 +; P8LE-NEXT: mulli r9, r9, 95 +; P8LE-NEXT: mulli r10, r10, -124 +; P8LE-NEXT: subf r3, r3, r5 +; P8LE-NEXT: subf r5, r8, r6 ; P8LE-NEXT: mtfprd f0, r3 -; P8LE-NEXT: subf r3, r10, r7 -; P8LE-NEXT: mtfprd f1, r4 -; P8LE-NEXT: subf r4, r8, r5 +; P8LE-NEXT: subf r3, r9, r7 +; P8LE-NEXT: subf r4, r10, r4 +; P8LE-NEXT: mtfprd f1, r5 ; P8LE-NEXT: mtfprd f2, r3 ; P8LE-NEXT: xxswapd v2, vs0 ; P8LE-NEXT: mtfprd f3, r4 @@ -220,42 +194,34 @@ ; P8BE: # %bb.0: ; P8BE-NEXT: mfvsrd r4, v2 ; P8BE-NEXT: lis r3, -16728 -; P8BE-NEXT: lis r9, 31710 ; P8BE-NEXT: lis r8, 21399 +; P8BE-NEXT: lis r9, 31710 ; P8BE-NEXT: lis r10, -21386 ; P8BE-NEXT: ori r3, r3, 63249 -; P8BE-NEXT: ori r9, r9, 63421 ; P8BE-NEXT: ori r8, r8, 33437 +; P8BE-NEXT: ori r9, r9, 63421 ; P8BE-NEXT: ori r10, r10, 37253 ; P8BE-NEXT: clrldi r5, r4, 48 -; P8BE-NEXT: rldicl r7, r4, 32, 48 ; P8BE-NEXT: rldicl r6, r4, 48, 48 -; P8BE-NEXT: rldicl r4, r4, 16, 48 +; P8BE-NEXT: rldicl r7, r4, 32, 48 ; P8BE-NEXT: extsh r5, r5 -; P8BE-NEXT: extsh r7, r7 ; P8BE-NEXT: extsh r6, r6 -; P8BE-NEXT: extsw r5, r5 +; P8BE-NEXT: rldicl r4, r4, 16, 48 +; P8BE-NEXT: extsh r7, r7 +; P8BE-NEXT: mulhw r3, r5, r3 ; P8BE-NEXT: extsh r4, r4 -; P8BE-NEXT: extsw r7, r7 -; P8BE-NEXT: extsw r6, r6 -; P8BE-NEXT: mulld r3, r5, r3 -; P8BE-NEXT: extsw r4, r4 -; P8BE-NEXT: mulld r9, r7, r9 -; P8BE-NEXT: mulld r8, r6, r8 -; P8BE-NEXT: mulld r10, r4, r10 -; P8BE-NEXT: rldicl r11, r3, 1, 63 -; P8BE-NEXT: rldicl r3, r3, 32, 32 -; P8BE-NEXT: rldicl r9, r9, 32, 32 -; P8BE-NEXT: rldicl r12, r8, 1, 63 -; P8BE-NEXT: rldicl r8, r8, 32, 32 -; P8BE-NEXT: rldicl r10, r10, 32, 32 -; P8BE-NEXT: subf r9, r7, r9 +; P8BE-NEXT: mulhw r8, r6, r8 +; P8BE-NEXT: mulhw r9, r7, r9 +; P8BE-NEXT: mulhw r10, r4, r10 +; P8BE-NEXT: srwi r11, r3, 31 ; P8BE-NEXT: srawi r3, r3, 8 +; P8BE-NEXT: add r3, r3, r11 +; P8BE-NEXT: srwi r11, r8, 31 +; P8BE-NEXT: subf r9, r7, r9 ; P8BE-NEXT: srawi r8, r8, 5 ; P8BE-NEXT: add r10, r10, r4 -; P8BE-NEXT: add r3, r3, r11 +; P8BE-NEXT: add r8, r8, r11 ; P8BE-NEXT: srwi r11, r9, 31 -; P8BE-NEXT: add r8, r8, r12 ; P8BE-NEXT: srawi r9, r9, 6 ; P8BE-NEXT: mulli r3, r3, -1003 ; P8BE-NEXT: add r9, r9, r11 @@ -290,12 +256,10 @@ ; P9LE: # %bb.0: ; P9LE-NEXT: li r3, 0 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: extsh r4, r3 ; P9LE-NEXT: lis r5, -21386 ; P9LE-NEXT: ori r5, r5, 37253 -; P9LE-NEXT: extsw r4, r4 -; P9LE-NEXT: mulld r6, r4, r5 -; P9LE-NEXT: rldicl r6, r6, 32, 32 +; P9LE-NEXT: extsh r4, r3 +; P9LE-NEXT: mulhw r6, r4, r5 ; P9LE-NEXT: add r4, r6, r4 ; P9LE-NEXT: srwi r6, r4, 31 ; P9LE-NEXT: srawi r4, r4, 6 @@ -306,9 +270,7 @@ ; P9LE-NEXT: li r3, 2 ; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: extsh r4, r3 -; P9LE-NEXT: extsw r4, r4 -; P9LE-NEXT: mulld r6, r4, r5 -; P9LE-NEXT: rldicl r6, r6, 32, 32 +; P9LE-NEXT: mulhw r6, r4, r5 ; P9LE-NEXT: add r4, r6, r4 ; P9LE-NEXT: srwi r6, r4, 31 ; P9LE-NEXT: srawi r4, r4, 6 @@ -320,9 +282,7 @@ ; P9LE-NEXT: li r3, 4 ; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: extsh r4, r3 -; P9LE-NEXT: extsw r4, r4 -; P9LE-NEXT: mulld r6, r4, r5 -; P9LE-NEXT: rldicl r6, r6, 32, 32 +; P9LE-NEXT: mulhw r6, r4, r5 ; P9LE-NEXT: add r4, r6, r4 ; P9LE-NEXT: srwi r6, r4, 31 ; P9LE-NEXT: srawi r4, r4, 6 @@ -334,9 +294,7 @@ ; P9LE-NEXT: li r3, 6 ; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: extsh r4, r3 -; P9LE-NEXT: extsw r4, r4 -; P9LE-NEXT: mulld r5, r4, r5 -; P9LE-NEXT: rldicl r5, r5, 32, 32 +; P9LE-NEXT: mulhw r5, r4, r5 ; P9LE-NEXT: add r4, r5, r4 ; P9LE-NEXT: srwi r5, r4, 31 ; P9LE-NEXT: srawi r4, r4, 6 @@ -355,12 +313,10 @@ ; P9BE: # %bb.0: ; P9BE-NEXT: li r3, 6 ; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: extsh r3, r3 ; P9BE-NEXT: lis r4, -21386 ; P9BE-NEXT: ori r4, r4, 37253 -; P9BE-NEXT: extsw r3, r3 -; P9BE-NEXT: mulld r5, r3, r4 -; P9BE-NEXT: rldicl r5, r5, 32, 32 +; P9BE-NEXT: extsh r3, r3 +; P9BE-NEXT: mulhw r5, r3, r4 ; P9BE-NEXT: add r5, r5, r3 ; P9BE-NEXT: srwi r6, r5, 31 ; P9BE-NEXT: srawi r5, r5, 6 @@ -372,9 +328,7 @@ ; P9BE-NEXT: li r3, 4 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: extsh r3, r3 -; P9BE-NEXT: extsw r3, r3 -; P9BE-NEXT: mulld r5, r3, r4 -; P9BE-NEXT: rldicl r5, r5, 32, 32 +; P9BE-NEXT: mulhw r5, r3, r4 ; P9BE-NEXT: add r5, r5, r3 ; P9BE-NEXT: srwi r6, r5, 31 ; P9BE-NEXT: srawi r5, r5, 6 @@ -386,9 +340,7 @@ ; P9BE-NEXT: li r3, 2 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: extsh r3, r3 -; P9BE-NEXT: extsw r3, r3 -; P9BE-NEXT: mulld r5, r3, r4 -; P9BE-NEXT: rldicl r5, r5, 32, 32 +; P9BE-NEXT: mulhw r5, r3, r4 ; P9BE-NEXT: add r5, r5, r3 ; P9BE-NEXT: srwi r6, r5, 31 ; P9BE-NEXT: srawi r5, r5, 6 @@ -401,9 +353,7 @@ ; P9BE-NEXT: li r3, 0 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: extsh r3, r3 -; P9BE-NEXT: extsw r3, r3 -; P9BE-NEXT: mulld r4, r3, r4 -; P9BE-NEXT: rldicl r4, r4, 32, 32 +; P9BE-NEXT: mulhw r4, r3, r4 ; P9BE-NEXT: add r4, r4, r3 ; P9BE-NEXT: srwi r5, r4, 31 ; P9BE-NEXT: srawi r4, r4, 6 @@ -419,64 +369,56 @@ ; P8LE-LABEL: fold_srem_vec_2: ; P8LE: # %bb.0: ; P8LE-NEXT: xxswapd vs0, v2 -; P8LE-NEXT: lis r4, -21386 +; P8LE-NEXT: lis r3, -21386 ; P8LE-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; P8LE-NEXT: ori r4, r4, 37253 -; P8LE-NEXT: mffprd r5, f0 -; P8LE-NEXT: clrldi r3, r5, 48 -; P8LE-NEXT: rldicl r7, r5, 32, 48 -; P8LE-NEXT: extsh r8, r3 -; P8LE-NEXT: rldicl r6, r5, 48, 48 -; P8LE-NEXT: extsh r10, r7 -; P8LE-NEXT: rldicl r5, r5, 16, 48 -; P8LE-NEXT: extsw r8, r8 +; P8LE-NEXT: ori r3, r3, 37253 +; P8LE-NEXT: mffprd r4, f0 +; P8LE-NEXT: clrldi r5, r4, 48 +; P8LE-NEXT: rldicl r6, r4, 48, 48 +; P8LE-NEXT: extsh r8, r5 +; P8LE-NEXT: rldicl r7, r4, 32, 48 ; P8LE-NEXT: extsh r9, r6 -; P8LE-NEXT: extsw r10, r10 -; P8LE-NEXT: extsh r11, r5 -; P8LE-NEXT: mulld r12, r8, r4 -; P8LE-NEXT: extsw r9, r9 -; P8LE-NEXT: extsw r11, r11 -; P8LE-NEXT: mulld r30, r10, r4 -; P8LE-NEXT: mulld r0, r9, r4 -; P8LE-NEXT: mulld r4, r11, r4 -; P8LE-NEXT: rldicl r12, r12, 32, 32 -; P8LE-NEXT: add r8, r12, r8 -; P8LE-NEXT: rldicl r12, r30, 32, 32 -; P8LE-NEXT: ld r30, -16(r1) # 8-byte Folded Reload -; P8LE-NEXT: rldicl r0, r0, 32, 32 -; P8LE-NEXT: rldicl r4, r4, 32, 32 -; P8LE-NEXT: add r10, r12, r10 -; P8LE-NEXT: add r9, r0, r9 -; P8LE-NEXT: srwi r0, r8, 31 -; P8LE-NEXT: add r4, r4, r11 -; P8LE-NEXT: srwi r11, r10, 31 +; P8LE-NEXT: mulhw r10, r8, r3 +; P8LE-NEXT: rldicl r4, r4, 16, 48 +; P8LE-NEXT: extsh r11, r7 +; P8LE-NEXT: mulhw r12, r9, r3 +; P8LE-NEXT: extsh r0, r4 +; P8LE-NEXT: mulhw r30, r11, r3 +; P8LE-NEXT: mulhw r3, r0, r3 +; P8LE-NEXT: add r8, r10, r8 +; P8LE-NEXT: add r9, r12, r9 +; P8LE-NEXT: srwi r10, r8, 31 ; P8LE-NEXT: srawi r8, r8, 6 -; P8LE-NEXT: srawi r10, r10, 6 -; P8LE-NEXT: srwi r12, r9, 31 -; P8LE-NEXT: add r8, r8, r0 +; P8LE-NEXT: add r11, r30, r11 +; P8LE-NEXT: add r3, r3, r0 +; P8LE-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; P8LE-NEXT: add r8, r8, r10 +; P8LE-NEXT: srwi r10, r9, 31 ; P8LE-NEXT: srawi r9, r9, 6 -; P8LE-NEXT: add r10, r10, r11 -; P8LE-NEXT: srwi r11, r4, 31 -; P8LE-NEXT: srawi r4, r4, 6 -; P8LE-NEXT: add r9, r9, r12 ; P8LE-NEXT: mulli r8, r8, 95 -; P8LE-NEXT: add r4, r4, r11 +; P8LE-NEXT: add r9, r9, r10 +; P8LE-NEXT: srwi r10, r11, 31 +; P8LE-NEXT: srawi r11, r11, 6 ; P8LE-NEXT: mulli r9, r9, 95 +; P8LE-NEXT: add r10, r11, r10 +; P8LE-NEXT: srwi r11, r3, 31 +; P8LE-NEXT: srawi r3, r3, 6 ; P8LE-NEXT: mulli r10, r10, 95 -; P8LE-NEXT: mulli r4, r4, 95 -; P8LE-NEXT: subf r3, r8, r3 +; P8LE-NEXT: subf r5, r8, r5 +; P8LE-NEXT: add r3, r3, r11 +; P8LE-NEXT: mtfprd f0, r5 +; P8LE-NEXT: mulli r3, r3, 95 ; P8LE-NEXT: subf r6, r9, r6 -; P8LE-NEXT: mtfprd f0, r3 -; P8LE-NEXT: subf r3, r10, r7 -; P8LE-NEXT: subf r4, r4, r5 ; P8LE-NEXT: mtfprd f1, r6 -; P8LE-NEXT: mtfprd f2, r3 ; P8LE-NEXT: xxswapd v2, vs0 -; P8LE-NEXT: mtfprd f3, r4 +; P8LE-NEXT: subf r5, r10, r7 +; P8LE-NEXT: mtfprd f2, r5 ; P8LE-NEXT: xxswapd v3, vs1 +; P8LE-NEXT: subf r3, r3, r4 +; P8LE-NEXT: mtfprd f3, r3 ; P8LE-NEXT: xxswapd v4, vs2 -; P8LE-NEXT: xxswapd v5, vs3 ; P8LE-NEXT: vmrglh v2, v3, v2 +; P8LE-NEXT: xxswapd v5, vs3 ; P8LE-NEXT: vmrglh v3, v5, v4 ; P8LE-NEXT: vmrglw v2, v3, v2 ; P8LE-NEXT: blr @@ -491,29 +433,21 @@ ; P8BE-NEXT: extsh r5, r5 ; P8BE-NEXT: rldicl r7, r4, 32, 48 ; P8BE-NEXT: extsh r6, r6 -; P8BE-NEXT: extsw r5, r5 +; P8BE-NEXT: mulhw r8, r5, r3 ; P8BE-NEXT: rldicl r4, r4, 16, 48 ; P8BE-NEXT: extsh r7, r7 -; P8BE-NEXT: extsw r6, r6 -; P8BE-NEXT: mulld r8, r5, r3 +; P8BE-NEXT: mulhw r9, r6, r3 ; P8BE-NEXT: extsh r4, r4 -; P8BE-NEXT: extsw r7, r7 -; P8BE-NEXT: mulld r9, r6, r3 -; P8BE-NEXT: extsw r4, r4 -; P8BE-NEXT: mulld r10, r7, r3 -; P8BE-NEXT: mulld r3, r4, r3 -; P8BE-NEXT: rldicl r8, r8, 32, 32 -; P8BE-NEXT: rldicl r9, r9, 32, 32 +; P8BE-NEXT: mulhw r10, r7, r3 +; P8BE-NEXT: mulhw r3, r4, r3 ; P8BE-NEXT: add r8, r8, r5 -; P8BE-NEXT: rldicl r10, r10, 32, 32 ; P8BE-NEXT: add r9, r9, r6 ; P8BE-NEXT: srwi r11, r8, 31 ; P8BE-NEXT: srawi r8, r8, 6 -; P8BE-NEXT: rldicl r3, r3, 32, 32 ; P8BE-NEXT: add r10, r10, r7 +; P8BE-NEXT: add r3, r3, r4 ; P8BE-NEXT: add r8, r8, r11 ; P8BE-NEXT: srwi r11, r9, 31 -; P8BE-NEXT: add r3, r3, r4 ; P8BE-NEXT: srawi r9, r9, 6 ; P8BE-NEXT: mulli r8, r8, 95 ; P8BE-NEXT: add r9, r9, r11 @@ -553,12 +487,10 @@ ; P9LE: # %bb.0: ; P9LE-NEXT: li r3, 0 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: extsh r4, r3 ; P9LE-NEXT: lis r5, -21386 ; P9LE-NEXT: ori r5, r5, 37253 -; P9LE-NEXT: extsw r4, r4 -; P9LE-NEXT: mulld r6, r4, r5 -; P9LE-NEXT: rldicl r6, r6, 32, 32 +; P9LE-NEXT: extsh r4, r3 +; P9LE-NEXT: mulhw r6, r4, r5 ; P9LE-NEXT: add r4, r6, r4 ; P9LE-NEXT: srwi r6, r4, 31 ; P9LE-NEXT: srawi r4, r4, 6 @@ -569,9 +501,7 @@ ; P9LE-NEXT: li r3, 2 ; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: extsh r6, r3 -; P9LE-NEXT: extsw r6, r6 -; P9LE-NEXT: mulld r7, r6, r5 -; P9LE-NEXT: rldicl r7, r7, 32, 32 +; P9LE-NEXT: mulhw r7, r6, r5 ; P9LE-NEXT: add r6, r7, r6 ; P9LE-NEXT: srwi r7, r6, 31 ; P9LE-NEXT: srawi r6, r6, 6 @@ -583,9 +513,7 @@ ; P9LE-NEXT: li r3, 4 ; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: extsh r7, r3 -; P9LE-NEXT: extsw r7, r7 -; P9LE-NEXT: mulld r8, r7, r5 -; P9LE-NEXT: rldicl r8, r8, 32, 32 +; P9LE-NEXT: mulhw r8, r7, r5 ; P9LE-NEXT: add r7, r8, r7 ; P9LE-NEXT: srwi r8, r7, 31 ; P9LE-NEXT: srawi r7, r7, 6 @@ -597,9 +525,7 @@ ; P9LE-NEXT: li r3, 6 ; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: extsh r8, r3 -; P9LE-NEXT: extsw r8, r8 -; P9LE-NEXT: mulld r5, r8, r5 -; P9LE-NEXT: rldicl r5, r5, 32, 32 +; P9LE-NEXT: mulhw r5, r8, r5 ; P9LE-NEXT: add r5, r5, r8 ; P9LE-NEXT: srwi r8, r5, 31 ; P9LE-NEXT: srawi r5, r5, 6 @@ -630,12 +556,10 @@ ; P9BE: # %bb.0: ; P9BE-NEXT: li r3, 6 ; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: extsh r4, r3 ; P9BE-NEXT: lis r5, -21386 ; P9BE-NEXT: ori r5, r5, 37253 -; P9BE-NEXT: extsw r4, r4 -; P9BE-NEXT: mulld r6, r4, r5 -; P9BE-NEXT: rldicl r6, r6, 32, 32 +; P9BE-NEXT: extsh r4, r3 +; P9BE-NEXT: mulhw r6, r4, r5 ; P9BE-NEXT: add r4, r6, r4 ; P9BE-NEXT: srwi r6, r4, 31 ; P9BE-NEXT: srawi r4, r4, 6 @@ -647,9 +571,7 @@ ; P9BE-NEXT: li r3, 4 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: extsh r6, r3 -; P9BE-NEXT: extsw r6, r6 -; P9BE-NEXT: mulld r7, r6, r5 -; P9BE-NEXT: rldicl r7, r7, 32, 32 +; P9BE-NEXT: mulhw r7, r6, r5 ; P9BE-NEXT: add r6, r7, r6 ; P9BE-NEXT: srwi r7, r6, 31 ; P9BE-NEXT: srawi r6, r6, 6 @@ -661,9 +583,7 @@ ; P9BE-NEXT: li r3, 2 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: extsh r7, r3 -; P9BE-NEXT: extsw r7, r7 -; P9BE-NEXT: mulld r8, r7, r5 -; P9BE-NEXT: rldicl r8, r8, 32, 32 +; P9BE-NEXT: mulhw r8, r7, r5 ; P9BE-NEXT: add r7, r8, r7 ; P9BE-NEXT: srwi r8, r7, 31 ; P9BE-NEXT: srawi r7, r7, 6 @@ -676,9 +596,7 @@ ; P9BE-NEXT: li r3, 0 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: extsh r3, r3 -; P9BE-NEXT: extsw r3, r3 -; P9BE-NEXT: mulld r5, r3, r5 -; P9BE-NEXT: rldicl r5, r5, 32, 32 +; P9BE-NEXT: mulhw r5, r3, r5 ; P9BE-NEXT: add r5, r5, r3 ; P9BE-NEXT: srwi r8, r5, 31 ; P9BE-NEXT: srawi r5, r5, 6 @@ -706,66 +624,58 @@ ; P8LE-LABEL: combine_srem_sdiv: ; P8LE: # %bb.0: ; P8LE-NEXT: xxswapd vs0, v2 -; P8LE-NEXT: lis r5, -21386 +; P8LE-NEXT: lis r4, -21386 ; P8LE-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; P8LE-NEXT: ori r5, r5, 37253 -; P8LE-NEXT: mffprd r6, f0 -; P8LE-NEXT: clrldi r3, r6, 48 -; P8LE-NEXT: rldicl r4, r6, 48, 48 -; P8LE-NEXT: rldicl r7, r6, 32, 48 +; P8LE-NEXT: ori r4, r4, 37253 +; P8LE-NEXT: mffprd r5, f0 +; P8LE-NEXT: clrldi r3, r5, 48 +; P8LE-NEXT: rldicl r6, r5, 48, 48 +; P8LE-NEXT: rldicl r7, r5, 32, 48 ; P8LE-NEXT: extsh r8, r3 -; P8LE-NEXT: extsh r9, r4 -; P8LE-NEXT: rldicl r6, r6, 16, 48 +; P8LE-NEXT: extsh r9, r6 ; P8LE-NEXT: extsh r10, r7 -; P8LE-NEXT: extsw r8, r8 -; P8LE-NEXT: extsw r9, r9 -; P8LE-NEXT: extsh r11, r6 -; P8LE-NEXT: extsw r10, r10 -; P8LE-NEXT: mulld r12, r8, r5 -; P8LE-NEXT: extsw r11, r11 -; P8LE-NEXT: mulld r0, r9, r5 -; P8LE-NEXT: mulld r30, r10, r5 -; P8LE-NEXT: mulld r5, r11, r5 -; P8LE-NEXT: rldicl r12, r12, 32, 32 -; P8LE-NEXT: rldicl r0, r0, 32, 32 -; P8LE-NEXT: rldicl r30, r30, 32, 32 -; P8LE-NEXT: add r8, r12, r8 -; P8LE-NEXT: rldicl r5, r5, 32, 32 -; P8LE-NEXT: add r9, r0, r9 -; P8LE-NEXT: add r10, r30, r10 -; P8LE-NEXT: srwi r12, r8, 31 -; P8LE-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; P8LE-NEXT: mulhw r11, r8, r4 +; P8LE-NEXT: rldicl r5, r5, 16, 48 +; P8LE-NEXT: mulhw r12, r9, r4 +; P8LE-NEXT: mulhw r0, r10, r4 +; P8LE-NEXT: extsh r30, r5 +; P8LE-NEXT: mulhw r4, r30, r4 +; P8LE-NEXT: add r8, r11, r8 +; P8LE-NEXT: add r9, r12, r9 +; P8LE-NEXT: srwi r11, r8, 31 +; P8LE-NEXT: add r10, r0, r10 ; P8LE-NEXT: srawi r8, r8, 6 -; P8LE-NEXT: srawi r0, r9, 6 +; P8LE-NEXT: srawi r12, r9, 6 ; P8LE-NEXT: srwi r9, r9, 31 -; P8LE-NEXT: add r5, r5, r11 -; P8LE-NEXT: add r8, r8, r12 -; P8LE-NEXT: srawi r12, r10, 6 +; P8LE-NEXT: add r8, r8, r11 +; P8LE-NEXT: add r4, r4, r30 +; P8LE-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; P8LE-NEXT: srawi r11, r10, 6 ; P8LE-NEXT: srwi r10, r10, 31 -; P8LE-NEXT: add r9, r0, r9 -; P8LE-NEXT: mulli r0, r8, 95 -; P8LE-NEXT: add r10, r12, r10 +; P8LE-NEXT: add r9, r12, r9 ; P8LE-NEXT: mtfprd f0, r8 -; P8LE-NEXT: srwi r8, r5, 31 -; P8LE-NEXT: srawi r5, r5, 6 -; P8LE-NEXT: mulli r11, r9, 95 +; P8LE-NEXT: mulli r12, r8, 95 +; P8LE-NEXT: add r10, r11, r10 +; P8LE-NEXT: srwi r8, r4, 31 ; P8LE-NEXT: mtfprd f1, r9 -; P8LE-NEXT: mulli r9, r10, 95 -; P8LE-NEXT: add r5, r5, r8 +; P8LE-NEXT: srawi r4, r4, 6 +; P8LE-NEXT: mulli r11, r9, 95 ; P8LE-NEXT: xxswapd v2, vs0 ; P8LE-NEXT: mtfprd f2, r10 -; P8LE-NEXT: mtfprd f3, r5 -; P8LE-NEXT: mulli r5, r5, 95 +; P8LE-NEXT: mulli r9, r10, 95 +; P8LE-NEXT: add r4, r4, r8 ; P8LE-NEXT: xxswapd v3, vs1 -; P8LE-NEXT: subf r3, r0, r3 +; P8LE-NEXT: mtfprd f3, r4 +; P8LE-NEXT: mulli r4, r4, 95 ; P8LE-NEXT: xxswapd v1, vs2 +; P8LE-NEXT: subf r3, r12, r3 ; P8LE-NEXT: mtfprd f0, r3 -; P8LE-NEXT: subf r4, r11, r4 +; P8LE-NEXT: subf r6, r11, r6 ; P8LE-NEXT: xxswapd v6, vs3 ; P8LE-NEXT: subf r3, r9, r7 -; P8LE-NEXT: mtfprd f1, r4 +; P8LE-NEXT: mtfprd f1, r6 ; P8LE-NEXT: mtfprd f4, r3 -; P8LE-NEXT: subf r3, r5, r6 +; P8LE-NEXT: subf r3, r4, r5 ; P8LE-NEXT: mtfprd f5, r3 ; P8LE-NEXT: xxswapd v4, vs1 ; P8LE-NEXT: vmrglh v2, v3, v2 @@ -782,69 +692,61 @@ ; ; P8BE-LABEL: combine_srem_sdiv: ; P8BE: # %bb.0: -; P8BE-NEXT: mfvsrd r6, v2 -; P8BE-NEXT: lis r5, -21386 -; P8BE-NEXT: ori r5, r5, 37253 -; P8BE-NEXT: clrldi r3, r6, 48 -; P8BE-NEXT: rldicl r4, r6, 48, 48 +; P8BE-NEXT: mfvsrd r5, v2 +; P8BE-NEXT: lis r4, -21386 +; P8BE-NEXT: ori r4, r4, 37253 +; P8BE-NEXT: clrldi r3, r5, 48 +; P8BE-NEXT: rldicl r6, r5, 48, 48 ; P8BE-NEXT: extsh r8, r3 -; P8BE-NEXT: rldicl r7, r6, 32, 48 -; P8BE-NEXT: extsh r9, r4 -; P8BE-NEXT: rldicl r6, r6, 16, 48 -; P8BE-NEXT: extsw r8, r8 +; P8BE-NEXT: rldicl r7, r5, 32, 48 +; P8BE-NEXT: extsh r9, r6 +; P8BE-NEXT: rldicl r5, r5, 16, 48 +; P8BE-NEXT: mulhw r11, r8, r4 ; P8BE-NEXT: extsh r10, r7 -; P8BE-NEXT: extsw r9, r9 -; P8BE-NEXT: extsh r6, r6 -; P8BE-NEXT: mulld r11, r8, r5 -; P8BE-NEXT: extsw r10, r10 -; P8BE-NEXT: extsw r6, r6 -; P8BE-NEXT: mulld r12, r9, r5 -; P8BE-NEXT: mulld r0, r10, r5 -; P8BE-NEXT: mulld r5, r6, r5 -; P8BE-NEXT: rldicl r11, r11, 32, 32 -; P8BE-NEXT: rldicl r12, r12, 32, 32 +; P8BE-NEXT: extsh r5, r5 +; P8BE-NEXT: mulhw r12, r9, r4 +; P8BE-NEXT: mulhw r0, r10, r4 +; P8BE-NEXT: mulhw r4, r5, r4 ; P8BE-NEXT: add r8, r11, r8 -; P8BE-NEXT: rldicl r0, r0, 32, 32 -; P8BE-NEXT: rldicl r5, r5, 32, 32 ; P8BE-NEXT: add r9, r12, r9 ; P8BE-NEXT: srawi r11, r8, 6 ; P8BE-NEXT: srwi r8, r8, 31 ; P8BE-NEXT: add r10, r0, r10 -; P8BE-NEXT: add r5, r5, r6 +; P8BE-NEXT: add r4, r4, r5 +; P8BE-NEXT: add r8, r11, r8 ; P8BE-NEXT: srawi r12, r9, 6 ; P8BE-NEXT: srwi r9, r9, 31 -; P8BE-NEXT: add r8, r11, r8 ; P8BE-NEXT: srawi r0, r10, 6 -; P8BE-NEXT: srawi r11, r5, 6 +; P8BE-NEXT: srawi r11, r4, 6 ; P8BE-NEXT: srwi r10, r10, 31 ; P8BE-NEXT: add r9, r12, r9 -; P8BE-NEXT: srwi r5, r5, 31 +; P8BE-NEXT: srwi r4, r4, 31 ; P8BE-NEXT: mulli r12, r8, 95 ; P8BE-NEXT: add r10, r0, r10 -; P8BE-NEXT: add r5, r11, r5 +; P8BE-NEXT: add r4, r11, r4 ; P8BE-NEXT: mulli r0, r9, 95 ; P8BE-NEXT: sldi r9, r9, 48 ; P8BE-NEXT: sldi r8, r8, 48 ; P8BE-NEXT: mtvsrd v3, r9 -; P8BE-NEXT: mulli r9, r5, 95 +; P8BE-NEXT: mulli r9, r4, 95 ; P8BE-NEXT: mtvsrd v2, r8 ; P8BE-NEXT: mulli r8, r10, 95 ; P8BE-NEXT: sldi r10, r10, 48 ; P8BE-NEXT: subf r3, r12, r3 ; P8BE-NEXT: mtvsrd v4, r10 -; P8BE-NEXT: subf r4, r0, r4 +; P8BE-NEXT: subf r6, r0, r6 ; P8BE-NEXT: sldi r3, r3, 48 ; P8BE-NEXT: vmrghh v2, v3, v2 -; P8BE-NEXT: sldi r4, r4, 48 +; P8BE-NEXT: sldi r6, r6, 48 ; P8BE-NEXT: mtvsrd v3, r3 -; P8BE-NEXT: subf r3, r9, r6 +; P8BE-NEXT: subf r3, r9, r5 ; P8BE-NEXT: subf r7, r8, r7 -; P8BE-NEXT: mtvsrd v5, r4 +; P8BE-NEXT: mtvsrd v5, r6 ; P8BE-NEXT: sldi r3, r3, 48 -; P8BE-NEXT: sldi r6, r7, 48 +; P8BE-NEXT: sldi r5, r7, 48 ; P8BE-NEXT: mtvsrd v1, r3 -; P8BE-NEXT: sldi r3, r5, 48 -; P8BE-NEXT: mtvsrd v0, r6 +; P8BE-NEXT: sldi r3, r4, 48 +; P8BE-NEXT: mtvsrd v0, r5 ; P8BE-NEXT: vmrghh v3, v5, v3 ; P8BE-NEXT: mtvsrd v5, r3 ; P8BE-NEXT: vmrghh v0, v1, v0 @@ -882,14 +784,11 @@ ; P9LE-NEXT: mtfprd f0, r3 ; P9LE-NEXT: li r3, 6 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: extsh r4, r3 ; P9LE-NEXT: lis r5, -21386 ; P9LE-NEXT: ori r5, r5, 37253 ; P9LE-NEXT: xxswapd v4, vs0 -; P9LE-NEXT: vmrglh v3, v4, v3 -; P9LE-NEXT: extsw r4, r4 -; P9LE-NEXT: mulld r5, r4, r5 -; P9LE-NEXT: rldicl r5, r5, 32, 32 +; P9LE-NEXT: extsh r4, r3 +; P9LE-NEXT: mulhw r5, r4, r5 ; P9LE-NEXT: add r4, r5, r4 ; P9LE-NEXT: srwi r5, r4, 31 ; P9LE-NEXT: srawi r4, r4, 6 @@ -904,6 +803,7 @@ ; P9LE-NEXT: addze r4, r4 ; P9LE-NEXT: slwi r4, r4, 3 ; P9LE-NEXT: subf r3, r4, r3 +; P9LE-NEXT: vmrglh v3, v4, v3 ; P9LE-NEXT: xxswapd v4, vs0 ; P9LE-NEXT: mtfprd f0, r3 ; P9LE-NEXT: xxswapd v2, vs0 @@ -935,10 +835,8 @@ ; P9BE-NEXT: li r3, 6 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: extsh r3, r3 -; P9BE-NEXT: extsw r3, r3 ; P9BE-NEXT: ori r4, r4, 37253 -; P9BE-NEXT: mulld r4, r3, r4 -; P9BE-NEXT: rldicl r4, r4, 32, 32 +; P9BE-NEXT: mulhw r4, r3, r4 ; P9BE-NEXT: add r4, r4, r3 ; P9BE-NEXT: srwi r5, r4, 31 ; P9BE-NEXT: srawi r4, r4, 6 @@ -971,30 +869,28 @@ ; P8LE-NEXT: clrldi r7, r4, 48 ; P8LE-NEXT: extsh r6, r5 ; P8LE-NEXT: extsh r8, r7 -; P8LE-NEXT: extsw r6, r6 +; P8LE-NEXT: mulhw r3, r6, r3 ; P8LE-NEXT: rldicl r9, r4, 48, 48 -; P8LE-NEXT: mulld r3, r6, r3 ; P8LE-NEXT: srawi r8, r8, 6 ; P8LE-NEXT: extsh r10, r9 ; P8LE-NEXT: addze r8, r8 ; P8LE-NEXT: rldicl r4, r4, 32, 48 ; P8LE-NEXT: srawi r10, r10, 5 ; P8LE-NEXT: slwi r8, r8, 6 -; P8LE-NEXT: subf r7, r8, r7 -; P8LE-NEXT: rldicl r3, r3, 32, 32 -; P8LE-NEXT: mtfprd f0, r7 ; P8LE-NEXT: add r3, r3, r6 ; P8LE-NEXT: addze r6, r10 +; P8LE-NEXT: subf r7, r8, r7 ; P8LE-NEXT: srwi r10, r3, 31 ; P8LE-NEXT: srawi r3, r3, 6 +; P8LE-NEXT: mtfprd f0, r7 ; P8LE-NEXT: slwi r6, r6, 5 -; P8LE-NEXT: xxswapd v2, vs0 ; P8LE-NEXT: add r3, r3, r10 ; P8LE-NEXT: extsh r10, r4 ; P8LE-NEXT: subf r6, r6, r9 ; P8LE-NEXT: mulli r3, r3, 95 ; P8LE-NEXT: srawi r8, r10, 3 ; P8LE-NEXT: mtfprd f1, r6 +; P8LE-NEXT: xxswapd v2, vs0 ; P8LE-NEXT: addze r7, r8 ; P8LE-NEXT: xxswapd v3, vs1 ; P8LE-NEXT: subf r3, r3, r5 @@ -1018,9 +914,8 @@ ; P8BE-NEXT: rldicl r6, r4, 32, 48 ; P8BE-NEXT: extsh r5, r5 ; P8BE-NEXT: extsh r6, r6 -; P8BE-NEXT: extsw r5, r5 +; P8BE-NEXT: mulhw r3, r5, r3 ; P8BE-NEXT: rldicl r7, r4, 16, 48 -; P8BE-NEXT: mulld r3, r5, r3 ; P8BE-NEXT: srawi r8, r6, 5 ; P8BE-NEXT: extsh r7, r7 ; P8BE-NEXT: addze r8, r8 @@ -1028,16 +923,15 @@ ; P8BE-NEXT: srawi r9, r7, 6 ; P8BE-NEXT: extsh r4, r4 ; P8BE-NEXT: slwi r8, r8, 5 +; P8BE-NEXT: add r3, r3, r5 ; P8BE-NEXT: addze r9, r9 ; P8BE-NEXT: subf r6, r8, r6 -; P8BE-NEXT: rldicl r3, r3, 32, 32 -; P8BE-NEXT: slwi r8, r9, 6 -; P8BE-NEXT: add r3, r3, r5 -; P8BE-NEXT: subf r7, r8, r7 ; P8BE-NEXT: srwi r10, r3, 31 ; P8BE-NEXT: srawi r3, r3, 6 +; P8BE-NEXT: slwi r8, r9, 6 ; P8BE-NEXT: add r3, r3, r10 ; P8BE-NEXT: srawi r9, r4, 3 +; P8BE-NEXT: subf r7, r8, r7 ; P8BE-NEXT: mulli r3, r3, 95 ; P8BE-NEXT: sldi r6, r6, 48 ; P8BE-NEXT: addze r8, r9 @@ -1065,13 +959,10 @@ ; P9LE: # %bb.0: ; P9LE-NEXT: li r3, 2 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: extsh r4, r3 ; P9LE-NEXT: lis r5, -14230 ; P9LE-NEXT: ori r5, r5, 30865 -; P9LE-NEXT: extsw r4, r4 -; P9LE-NEXT: mulld r5, r4, r5 -; P9LE-NEXT: rldicl r5, r5, 32, 32 -; P9LE-NEXT: xxlxor v4, v4, v4 +; P9LE-NEXT: extsh r4, r3 +; P9LE-NEXT: mulhw r5, r4, r5 ; P9LE-NEXT: add r4, r5, r4 ; P9LE-NEXT: srwi r5, r4, 31 ; P9LE-NEXT: srawi r4, r4, 9 @@ -1081,12 +972,11 @@ ; P9LE-NEXT: subf r3, r4, r3 ; P9LE-NEXT: mtfprd f0, r3 ; P9LE-NEXT: li r3, 4 +; P9LE-NEXT: ori r5, r5, 17097 +; P9LE-NEXT: xxlxor v3, v3, v3 ; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: extsh r4, r3 -; P9LE-NEXT: extsw r4, r4 -; P9LE-NEXT: ori r5, r5, 17097 -; P9LE-NEXT: mulld r5, r4, r5 -; P9LE-NEXT: rldicl r5, r5, 32, 32 +; P9LE-NEXT: mulhw r5, r4, r5 ; P9LE-NEXT: add r4, r5, r4 ; P9LE-NEXT: srwi r5, r4, 31 ; P9LE-NEXT: srawi r4, r4, 4 @@ -1094,21 +984,19 @@ ; P9LE-NEXT: lis r5, 24749 ; P9LE-NEXT: mulli r4, r4, 23 ; P9LE-NEXT: subf r3, r4, r3 -; P9LE-NEXT: xxswapd v3, vs0 +; P9LE-NEXT: xxswapd v4, vs0 ; P9LE-NEXT: mtfprd f0, r3 ; P9LE-NEXT: li r3, 6 ; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: extsh r4, r3 -; P9LE-NEXT: extsw r4, r4 ; P9LE-NEXT: ori r5, r5, 47143 -; P9LE-NEXT: mulld r4, r4, r5 -; P9LE-NEXT: rldicl r5, r4, 1, 63 -; P9LE-NEXT: rldicl r4, r4, 32, 32 +; P9LE-NEXT: mulhw r4, r4, r5 +; P9LE-NEXT: srwi r5, r4, 31 ; P9LE-NEXT: srawi r4, r4, 11 ; P9LE-NEXT: add r4, r4, r5 ; P9LE-NEXT: mulli r4, r4, 5423 ; P9LE-NEXT: subf r3, r4, r3 -; P9LE-NEXT: vmrglh v3, v3, v4 +; P9LE-NEXT: vmrglh v3, v4, v3 ; P9LE-NEXT: xxswapd v4, vs0 ; P9LE-NEXT: mtfprd f0, r3 ; P9LE-NEXT: xxswapd v2, vs0 @@ -1120,12 +1008,10 @@ ; P9BE: # %bb.0: ; P9BE-NEXT: li r3, 4 ; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: extsh r3, r3 ; P9BE-NEXT: lis r4, -19946 ; P9BE-NEXT: ori r4, r4, 17097 -; P9BE-NEXT: extsw r3, r3 -; P9BE-NEXT: mulld r4, r3, r4 -; P9BE-NEXT: rldicl r4, r4, 32, 32 +; P9BE-NEXT: extsh r3, r3 +; P9BE-NEXT: mulhw r4, r3, r4 ; P9BE-NEXT: add r4, r4, r3 ; P9BE-NEXT: srwi r5, r4, 31 ; P9BE-NEXT: srawi r4, r4, 4 @@ -1138,11 +1024,9 @@ ; P9BE-NEXT: li r3, 6 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: extsh r3, r3 -; P9BE-NEXT: extsw r3, r3 ; P9BE-NEXT: ori r4, r4, 47143 -; P9BE-NEXT: mulld r4, r3, r4 -; P9BE-NEXT: rldicl r5, r4, 1, 63 -; P9BE-NEXT: rldicl r4, r4, 32, 32 +; P9BE-NEXT: mulhw r4, r3, r4 +; P9BE-NEXT: srwi r5, r4, 31 ; P9BE-NEXT: srawi r4, r4, 11 ; P9BE-NEXT: add r4, r4, r5 ; P9BE-NEXT: mulli r4, r4, 5423 @@ -1153,10 +1037,8 @@ ; P9BE-NEXT: li r3, 2 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: extsh r3, r3 -; P9BE-NEXT: extsw r3, r3 ; P9BE-NEXT: ori r4, r4, 30865 -; P9BE-NEXT: mulld r4, r3, r4 -; P9BE-NEXT: rldicl r4, r4, 32, 32 +; P9BE-NEXT: mulhw r4, r3, r4 ; P9BE-NEXT: add r4, r4, r3 ; P9BE-NEXT: srwi r5, r4, 31 ; P9BE-NEXT: srawi r4, r4, 9 @@ -1177,46 +1059,40 @@ ; P8LE: # %bb.0: ; P8LE-NEXT: xxswapd vs0, v2 ; P8LE-NEXT: lis r3, 24749 -; P8LE-NEXT: lis r8, -19946 -; P8LE-NEXT: lis r10, -14230 +; P8LE-NEXT: lis r7, -19946 +; P8LE-NEXT: lis r9, -14230 ; P8LE-NEXT: xxlxor v5, v5, v5 ; P8LE-NEXT: ori r3, r3, 47143 -; P8LE-NEXT: ori r8, r8, 17097 +; P8LE-NEXT: ori r7, r7, 17097 ; P8LE-NEXT: mffprd r4, f0 ; P8LE-NEXT: rldicl r5, r4, 16, 48 ; P8LE-NEXT: rldicl r6, r4, 32, 48 ; P8LE-NEXT: rldicl r4, r4, 48, 48 -; P8LE-NEXT: extsh r7, r5 -; P8LE-NEXT: extsh r9, r6 -; P8LE-NEXT: extsw r7, r7 -; P8LE-NEXT: extsh r11, r4 -; P8LE-NEXT: extsw r9, r9 -; P8LE-NEXT: mulld r3, r7, r3 -; P8LE-NEXT: ori r7, r10, 30865 -; P8LE-NEXT: extsw r10, r11 -; P8LE-NEXT: mulld r8, r9, r8 -; P8LE-NEXT: mulld r7, r10, r7 -; P8LE-NEXT: rldicl r11, r3, 1, 63 -; P8LE-NEXT: rldicl r3, r3, 32, 32 -; P8LE-NEXT: rldicl r8, r8, 32, 32 -; P8LE-NEXT: rldicl r7, r7, 32, 32 -; P8LE-NEXT: add r8, r8, r9 -; P8LE-NEXT: srawi r3, r3, 11 +; P8LE-NEXT: extsh r8, r5 +; P8LE-NEXT: extsh r10, r6 +; P8LE-NEXT: mulhw r3, r8, r3 +; P8LE-NEXT: ori r8, r9, 30865 +; P8LE-NEXT: extsh r9, r4 +; P8LE-NEXT: mulhw r7, r10, r7 +; P8LE-NEXT: mulhw r8, r9, r8 ; P8LE-NEXT: add r7, r7, r10 -; P8LE-NEXT: srwi r9, r8, 31 -; P8LE-NEXT: srawi r8, r8, 4 -; P8LE-NEXT: add r3, r3, r11 +; P8LE-NEXT: srwi r10, r3, 31 ; P8LE-NEXT: add r8, r8, r9 +; P8LE-NEXT: srawi r3, r3, 11 ; P8LE-NEXT: srwi r9, r7, 31 -; P8LE-NEXT: srawi r7, r7, 9 -; P8LE-NEXT: mulli r3, r3, 5423 +; P8LE-NEXT: srawi r7, r7, 4 +; P8LE-NEXT: add r3, r3, r10 ; P8LE-NEXT: add r7, r7, r9 -; P8LE-NEXT: mulli r8, r8, 23 -; P8LE-NEXT: mulli r7, r7, 654 +; P8LE-NEXT: srwi r9, r8, 31 +; P8LE-NEXT: srawi r8, r8, 9 +; P8LE-NEXT: mulli r3, r3, 5423 +; P8LE-NEXT: add r8, r8, r9 +; P8LE-NEXT: mulli r7, r7, 23 +; P8LE-NEXT: mulli r8, r8, 654 ; P8LE-NEXT: subf r3, r3, r5 ; P8LE-NEXT: mtfprd f0, r3 -; P8LE-NEXT: subf r3, r8, r6 -; P8LE-NEXT: subf r4, r7, r4 +; P8LE-NEXT: subf r3, r7, r6 +; P8LE-NEXT: subf r4, r8, r4 ; P8LE-NEXT: mtfprd f1, r3 ; P8LE-NEXT: mtfprd f2, r4 ; P8LE-NEXT: xxswapd v2, vs0 @@ -1229,54 +1105,48 @@ ; ; P8BE-LABEL: dont_fold_srem_one: ; P8BE: # %bb.0: -; P8BE-NEXT: mfvsrd r4, v2 -; P8BE-NEXT: lis r3, 24749 -; P8BE-NEXT: lis r7, -19946 +; P8BE-NEXT: mfvsrd r3, v2 +; P8BE-NEXT: lis r5, 24749 +; P8BE-NEXT: lis r6, -19946 ; P8BE-NEXT: lis r8, -14230 -; P8BE-NEXT: ori r3, r3, 47143 -; P8BE-NEXT: ori r7, r7, 17097 +; P8BE-NEXT: ori r5, r5, 47143 +; P8BE-NEXT: ori r6, r6, 17097 ; P8BE-NEXT: ori r8, r8, 30865 -; P8BE-NEXT: clrldi r5, r4, 48 -; P8BE-NEXT: rldicl r6, r4, 48, 48 -; P8BE-NEXT: rldicl r4, r4, 32, 48 -; P8BE-NEXT: extsh r5, r5 -; P8BE-NEXT: extsh r6, r6 +; P8BE-NEXT: clrldi r4, r3, 48 +; P8BE-NEXT: rldicl r7, r3, 48, 48 +; P8BE-NEXT: rldicl r3, r3, 32, 48 ; P8BE-NEXT: extsh r4, r4 -; P8BE-NEXT: extsw r5, r5 -; P8BE-NEXT: extsw r6, r6 -; P8BE-NEXT: extsw r4, r4 -; P8BE-NEXT: mulld r3, r5, r3 -; P8BE-NEXT: mulld r7, r6, r7 -; P8BE-NEXT: mulld r8, r4, r8 -; P8BE-NEXT: rldicl r9, r3, 1, 63 -; P8BE-NEXT: rldicl r3, r3, 32, 32 -; P8BE-NEXT: rldicl r7, r7, 32, 32 -; P8BE-NEXT: rldicl r8, r8, 32, 32 -; P8BE-NEXT: srawi r3, r3, 11 -; P8BE-NEXT: add r7, r7, r6 -; P8BE-NEXT: add r8, r8, r4 -; P8BE-NEXT: add r3, r3, r9 -; P8BE-NEXT: srwi r9, r7, 31 -; P8BE-NEXT: srawi r7, r7, 4 -; P8BE-NEXT: mulli r3, r3, 5423 -; P8BE-NEXT: add r7, r7, r9 +; P8BE-NEXT: extsh r7, r7 +; P8BE-NEXT: extsh r3, r3 +; P8BE-NEXT: mulhw r5, r4, r5 +; P8BE-NEXT: mulhw r6, r7, r6 +; P8BE-NEXT: mulhw r8, r3, r8 +; P8BE-NEXT: srwi r9, r5, 31 +; P8BE-NEXT: srawi r5, r5, 11 +; P8BE-NEXT: add r6, r6, r7 +; P8BE-NEXT: add r8, r8, r3 +; P8BE-NEXT: add r5, r5, r9 +; P8BE-NEXT: srwi r9, r6, 31 +; P8BE-NEXT: srawi r6, r6, 4 +; P8BE-NEXT: add r6, r6, r9 ; P8BE-NEXT: srwi r9, r8, 31 ; P8BE-NEXT: srawi r8, r8, 9 -; P8BE-NEXT: mulli r7, r7, 23 +; P8BE-NEXT: mulli r5, r5, 5423 ; P8BE-NEXT: add r8, r8, r9 +; P8BE-NEXT: mulli r6, r6, 23 ; P8BE-NEXT: li r9, 0 ; P8BE-NEXT: mulli r8, r8, 654 -; P8BE-NEXT: subf r3, r3, r5 +; P8BE-NEXT: subf r4, r5, r4 ; P8BE-NEXT: sldi r5, r9, 48 -; P8BE-NEXT: sldi r3, r3, 48 ; P8BE-NEXT: mtvsrd v2, r5 -; P8BE-NEXT: subf r5, r7, r6 -; P8BE-NEXT: mtvsrd v3, r3 -; P8BE-NEXT: sldi r3, r5, 48 -; P8BE-NEXT: subf r4, r8, r4 -; P8BE-NEXT: mtvsrd v4, r3 +; P8BE-NEXT: subf r5, r6, r7 ; P8BE-NEXT: sldi r4, r4, 48 -; P8BE-NEXT: mtvsrd v5, r4 +; P8BE-NEXT: subf r3, r8, r3 +; P8BE-NEXT: mtvsrd v3, r4 +; P8BE-NEXT: sldi r4, r5, 48 +; P8BE-NEXT: sldi r3, r3, 48 +; P8BE-NEXT: mtvsrd v4, r4 +; P8BE-NEXT: mtvsrd v5, r3 ; P8BE-NEXT: vmrghh v3, v4, v3 ; P8BE-NEXT: vmrghh v2, v2, v5 ; P8BE-NEXT: vmrghw v2, v2, v3 @@ -1291,12 +1161,10 @@ ; P9LE: # %bb.0: ; P9LE-NEXT: li r3, 4 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: extsh r4, r3 ; P9LE-NEXT: lis r5, -19946 ; P9LE-NEXT: ori r5, r5, 17097 -; P9LE-NEXT: extsw r4, r4 -; P9LE-NEXT: mulld r5, r4, r5 -; P9LE-NEXT: rldicl r5, r5, 32, 32 +; P9LE-NEXT: extsh r4, r3 +; P9LE-NEXT: mulhw r5, r4, r5 ; P9LE-NEXT: add r4, r5, r4 ; P9LE-NEXT: srwi r5, r4, 31 ; P9LE-NEXT: srawi r4, r4, 4 @@ -1308,11 +1176,9 @@ ; P9LE-NEXT: li r3, 6 ; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: extsh r4, r3 -; P9LE-NEXT: extsw r4, r4 ; P9LE-NEXT: ori r5, r5, 47143 -; P9LE-NEXT: mulld r4, r4, r5 -; P9LE-NEXT: rldicl r5, r4, 1, 63 -; P9LE-NEXT: rldicl r4, r4, 32, 32 +; P9LE-NEXT: mulhw r4, r4, r5 +; P9LE-NEXT: srwi r5, r4, 31 ; P9LE-NEXT: srawi r4, r4, 11 ; P9LE-NEXT: add r4, r4, r5 ; P9LE-NEXT: mulli r4, r4, 5423 @@ -1339,12 +1205,10 @@ ; P9BE: # %bb.0: ; P9BE-NEXT: li r3, 4 ; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: extsh r3, r3 ; P9BE-NEXT: lis r4, -19946 ; P9BE-NEXT: ori r4, r4, 17097 -; P9BE-NEXT: extsw r3, r3 -; P9BE-NEXT: mulld r4, r3, r4 -; P9BE-NEXT: rldicl r4, r4, 32, 32 +; P9BE-NEXT: extsh r3, r3 +; P9BE-NEXT: mulhw r4, r3, r4 ; P9BE-NEXT: add r4, r4, r3 ; P9BE-NEXT: srwi r5, r4, 31 ; P9BE-NEXT: srawi r4, r4, 4 @@ -1357,11 +1221,9 @@ ; P9BE-NEXT: li r3, 6 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: extsh r3, r3 -; P9BE-NEXT: extsw r3, r3 ; P9BE-NEXT: ori r4, r4, 47143 -; P9BE-NEXT: mulld r4, r3, r4 -; P9BE-NEXT: rldicl r5, r4, 1, 63 -; P9BE-NEXT: rldicl r4, r4, 32, 32 +; P9BE-NEXT: mulhw r4, r3, r4 +; P9BE-NEXT: srwi r5, r4, 31 ; P9BE-NEXT: srawi r4, r4, 11 ; P9BE-NEXT: add r4, r4, r5 ; P9BE-NEXT: mulli r4, r4, 5423 @@ -1388,39 +1250,35 @@ ; P8LE-LABEL: dont_fold_urem_i16_smax: ; P8LE: # %bb.0: ; P8LE-NEXT: xxswapd vs0, v2 -; P8LE-NEXT: lis r6, 24749 -; P8LE-NEXT: lis r7, -19946 +; P8LE-NEXT: lis r4, 24749 +; P8LE-NEXT: lis r5, -19946 ; P8LE-NEXT: xxlxor v5, v5, v5 -; P8LE-NEXT: ori r6, r6, 47143 -; P8LE-NEXT: ori r7, r7, 17097 +; P8LE-NEXT: ori r4, r4, 47143 +; P8LE-NEXT: ori r5, r5, 17097 ; P8LE-NEXT: mffprd r3, f0 -; P8LE-NEXT: rldicl r4, r3, 16, 48 -; P8LE-NEXT: rldicl r5, r3, 32, 48 -; P8LE-NEXT: extsh r8, r4 -; P8LE-NEXT: extsh r9, r5 -; P8LE-NEXT: extsw r8, r8 -; P8LE-NEXT: extsw r9, r9 -; P8LE-NEXT: mulld r6, r8, r6 -; P8LE-NEXT: mulld r7, r9, r7 +; P8LE-NEXT: rldicl r6, r3, 16, 48 +; P8LE-NEXT: rldicl r7, r3, 32, 48 +; P8LE-NEXT: extsh r8, r6 +; P8LE-NEXT: extsh r9, r7 +; P8LE-NEXT: mulhw r4, r8, r4 +; P8LE-NEXT: mulhw r5, r9, r5 ; P8LE-NEXT: rldicl r3, r3, 48, 48 -; P8LE-NEXT: rldicl r8, r6, 32, 32 -; P8LE-NEXT: rldicl r7, r7, 32, 32 -; P8LE-NEXT: rldicl r6, r6, 1, 63 -; P8LE-NEXT: srawi r8, r8, 11 -; P8LE-NEXT: add r7, r7, r9 -; P8LE-NEXT: add r6, r8, r6 -; P8LE-NEXT: srwi r8, r7, 31 -; P8LE-NEXT: srawi r7, r7, 4 -; P8LE-NEXT: mulli r6, r6, 5423 -; P8LE-NEXT: add r7, r7, r8 +; P8LE-NEXT: srwi r8, r4, 31 +; P8LE-NEXT: srawi r4, r4, 11 +; P8LE-NEXT: add r5, r5, r9 +; P8LE-NEXT: add r4, r4, r8 +; P8LE-NEXT: srwi r8, r5, 31 +; P8LE-NEXT: srawi r5, r5, 4 +; P8LE-NEXT: mulli r4, r4, 5423 +; P8LE-NEXT: add r5, r5, r8 ; P8LE-NEXT: extsh r8, r3 -; P8LE-NEXT: mulli r7, r7, 23 +; P8LE-NEXT: mulli r5, r5, 23 ; P8LE-NEXT: srawi r8, r8, 15 -; P8LE-NEXT: subf r4, r6, r4 +; P8LE-NEXT: subf r4, r4, r6 ; P8LE-NEXT: addze r6, r8 ; P8LE-NEXT: mtfprd f0, r4 ; P8LE-NEXT: slwi r4, r6, 15 -; P8LE-NEXT: subf r5, r7, r5 +; P8LE-NEXT: subf r5, r5, r7 ; P8LE-NEXT: subf r3, r4, r3 ; P8LE-NEXT: mtfprd f1, r5 ; P8LE-NEXT: xxswapd v2, vs0 @@ -1434,47 +1292,43 @@ ; ; P8BE-LABEL: dont_fold_urem_i16_smax: ; P8BE: # %bb.0: -; P8BE-NEXT: mfvsrd r4, v2 -; P8BE-NEXT: lis r3, 24749 -; P8BE-NEXT: lis r7, -19946 -; P8BE-NEXT: ori r3, r3, 47143 -; P8BE-NEXT: ori r7, r7, 17097 -; P8BE-NEXT: clrldi r5, r4, 48 -; P8BE-NEXT: rldicl r6, r4, 48, 48 -; P8BE-NEXT: extsh r5, r5 +; P8BE-NEXT: mfvsrd r3, v2 +; P8BE-NEXT: lis r4, 24749 +; P8BE-NEXT: lis r5, -19946 +; P8BE-NEXT: ori r4, r4, 47143 +; P8BE-NEXT: ori r5, r5, 17097 +; P8BE-NEXT: clrldi r6, r3, 48 +; P8BE-NEXT: rldicl r7, r3, 48, 48 ; P8BE-NEXT: extsh r6, r6 -; P8BE-NEXT: extsw r5, r5 -; P8BE-NEXT: extsw r6, r6 -; P8BE-NEXT: mulld r3, r5, r3 -; P8BE-NEXT: mulld r7, r6, r7 -; P8BE-NEXT: rldicl r4, r4, 32, 48 -; P8BE-NEXT: extsh r4, r4 -; P8BE-NEXT: rldicl r8, r3, 1, 63 -; P8BE-NEXT: rldicl r3, r3, 32, 32 -; P8BE-NEXT: rldicl r7, r7, 32, 32 -; P8BE-NEXT: srawi r3, r3, 11 -; P8BE-NEXT: add r7, r7, r6 -; P8BE-NEXT: add r3, r3, r8 -; P8BE-NEXT: srwi r8, r7, 31 -; P8BE-NEXT: srawi r7, r7, 4 -; P8BE-NEXT: mulli r3, r3, 5423 -; P8BE-NEXT: add r7, r7, r8 +; P8BE-NEXT: extsh r7, r7 +; P8BE-NEXT: mulhw r4, r6, r4 +; P8BE-NEXT: mulhw r5, r7, r5 +; P8BE-NEXT: rldicl r3, r3, 32, 48 +; P8BE-NEXT: extsh r3, r3 +; P8BE-NEXT: srwi r8, r4, 31 +; P8BE-NEXT: srawi r4, r4, 11 +; P8BE-NEXT: add r5, r5, r7 +; P8BE-NEXT: add r4, r4, r8 +; P8BE-NEXT: srwi r8, r5, 31 +; P8BE-NEXT: srawi r5, r5, 4 +; P8BE-NEXT: mulli r4, r4, 5423 +; P8BE-NEXT: add r5, r5, r8 ; P8BE-NEXT: li r8, 0 -; P8BE-NEXT: mulli r7, r7, 23 -; P8BE-NEXT: srawi r9, r4, 15 -; P8BE-NEXT: subf r3, r3, r5 -; P8BE-NEXT: sldi r5, r8, 48 +; P8BE-NEXT: mulli r5, r5, 23 +; P8BE-NEXT: srawi r9, r3, 15 +; P8BE-NEXT: subf r4, r4, r6 +; P8BE-NEXT: sldi r6, r8, 48 ; P8BE-NEXT: addze r8, r9 -; P8BE-NEXT: mtvsrd v2, r5 -; P8BE-NEXT: subf r5, r7, r6 +; P8BE-NEXT: mtvsrd v2, r6 ; P8BE-NEXT: slwi r6, r8, 15 -; P8BE-NEXT: sldi r3, r3, 48 -; P8BE-NEXT: subf r4, r6, r4 -; P8BE-NEXT: mtvsrd v3, r3 -; P8BE-NEXT: sldi r3, r5, 48 ; P8BE-NEXT: sldi r4, r4, 48 -; P8BE-NEXT: mtvsrd v4, r3 -; P8BE-NEXT: mtvsrd v5, r4 +; P8BE-NEXT: subf r5, r5, r7 +; P8BE-NEXT: subf r3, r6, r3 +; P8BE-NEXT: mtvsrd v3, r4 +; P8BE-NEXT: sldi r4, r5, 48 +; P8BE-NEXT: sldi r3, r3, 48 +; P8BE-NEXT: mtvsrd v4, r4 +; P8BE-NEXT: mtvsrd v5, r3 ; P8BE-NEXT: vmrghh v3, v4, v3 ; P8BE-NEXT: vmrghh v2, v2, v5 ; P8BE-NEXT: vmrghw v2, v2, v3 diff --git a/llvm/test/CodeGen/PowerPC/urem-vector-lkk.ll b/llvm/test/CodeGen/PowerPC/urem-vector-lkk.ll --- a/llvm/test/CodeGen/PowerPC/urem-vector-lkk.ll +++ b/llvm/test/CodeGen/PowerPC/urem-vector-lkk.ll @@ -15,21 +15,21 @@ ; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: lis r5, 21399 ; P9LE-NEXT: ori r5, r5, 33437 -; P9LE-NEXT: rlwinm r4, r3, 0, 16, 31 -; P9LE-NEXT: mulld r4, r4, r5 +; P9LE-NEXT: clrlwi r4, r3, 16 +; P9LE-NEXT: mulhwu r4, r4, r5 ; P9LE-NEXT: lis r5, 16727 ; P9LE-NEXT: ori r5, r5, 2287 -; P9LE-NEXT: rldicl r4, r4, 27, 37 +; P9LE-NEXT: srwi r4, r4, 5 ; P9LE-NEXT: mulli r4, r4, 98 ; P9LE-NEXT: subf r3, r4, r3 ; P9LE-NEXT: mtfprd f0, r3 ; P9LE-NEXT: li r3, 6 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: rlwinm r4, r3, 0, 16, 31 -; P9LE-NEXT: mulld r4, r4, r5 +; P9LE-NEXT: clrlwi r4, r3, 16 +; P9LE-NEXT: mulhwu r4, r4, r5 ; P9LE-NEXT: lis r5, 8456 ; P9LE-NEXT: ori r5, r5, 16913 -; P9LE-NEXT: rldicl r4, r4, 24, 40 +; P9LE-NEXT: srwi r4, r4, 8 ; P9LE-NEXT: mulli r4, r4, 1003 ; P9LE-NEXT: subf r3, r4, r3 ; P9LE-NEXT: xxswapd v3, vs0 @@ -37,8 +37,10 @@ ; P9LE-NEXT: li r3, 2 ; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: rlwinm r4, r3, 30, 18, 31 -; P9LE-NEXT: mulld r4, r4, r5 -; P9LE-NEXT: rldicl r4, r4, 30, 34 +; P9LE-NEXT: mulhwu r4, r4, r5 +; P9LE-NEXT: lis r5, 22765 +; P9LE-NEXT: ori r5, r5, 8969 +; P9LE-NEXT: srwi r4, r4, 2 ; P9LE-NEXT: mulli r4, r4, 124 ; P9LE-NEXT: subf r3, r4, r3 ; P9LE-NEXT: xxswapd v4, vs0 @@ -46,19 +48,15 @@ ; P9LE-NEXT: li r3, 0 ; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: clrlwi r4, r3, 16 -; P9LE-NEXT: lis r6, 22765 -; P9LE-NEXT: ori r6, r6, 8969 -; P9LE-NEXT: vmrglh v3, v4, v3 -; P9LE-NEXT: xxswapd v4, vs0 -; P9LE-NEXT: clrldi r5, r4, 32 -; P9LE-NEXT: mulld r5, r5, r6 -; P9LE-NEXT: rldicl r5, r5, 32, 32 +; P9LE-NEXT: mulhwu r5, r4, r5 ; P9LE-NEXT: subf r4, r5, r4 ; P9LE-NEXT: srwi r4, r4, 1 ; P9LE-NEXT: add r4, r4, r5 ; P9LE-NEXT: srwi r4, r4, 6 ; P9LE-NEXT: mulli r4, r4, 95 ; P9LE-NEXT: subf r3, r4, r3 +; P9LE-NEXT: vmrglh v3, v4, v3 +; P9LE-NEXT: xxswapd v4, vs0 ; P9LE-NEXT: mtfprd f0, r3 ; P9LE-NEXT: xxswapd v2, vs0 ; P9LE-NEXT: vmrglh v2, v4, v2 @@ -69,49 +67,45 @@ ; P9BE: # %bb.0: ; P9BE-NEXT: li r3, 6 ; P9BE-NEXT: vextuhlx r3, r3, v2 +; P9BE-NEXT: lis r4, 16727 +; P9BE-NEXT: ori r4, r4, 2287 ; P9BE-NEXT: clrlwi r3, r3, 16 -; P9BE-NEXT: lis r5, 16727 -; P9BE-NEXT: ori r5, r5, 2287 -; P9BE-NEXT: clrldi r4, r3, 32 -; P9BE-NEXT: mulld r4, r4, r5 -; P9BE-NEXT: lis r5, 21399 -; P9BE-NEXT: ori r5, r5, 33437 -; P9BE-NEXT: rldicl r4, r4, 24, 40 +; P9BE-NEXT: mulhwu r4, r3, r4 +; P9BE-NEXT: srwi r4, r4, 8 ; P9BE-NEXT: mulli r4, r4, 1003 ; P9BE-NEXT: subf r3, r4, r3 +; P9BE-NEXT: lis r4, 21399 ; P9BE-NEXT: sldi r3, r3, 48 ; P9BE-NEXT: mtvsrd v3, r3 ; P9BE-NEXT: li r3, 4 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: clrlwi r3, r3, 16 -; P9BE-NEXT: clrldi r4, r3, 32 -; P9BE-NEXT: mulld r4, r4, r5 -; P9BE-NEXT: lis r5, 8456 -; P9BE-NEXT: ori r5, r5, 16913 -; P9BE-NEXT: rldicl r4, r4, 27, 37 +; P9BE-NEXT: ori r4, r4, 33437 +; P9BE-NEXT: mulhwu r4, r3, r4 +; P9BE-NEXT: srwi r4, r4, 5 ; P9BE-NEXT: mulli r4, r4, 98 ; P9BE-NEXT: subf r3, r4, r3 ; P9BE-NEXT: sldi r3, r3, 48 ; P9BE-NEXT: mtvsrd v4, r3 ; P9BE-NEXT: li r3, 2 ; P9BE-NEXT: vextuhlx r3, r3, v2 +; P9BE-NEXT: lis r5, 8456 +; P9BE-NEXT: ori r5, r5, 16913 +; P9BE-NEXT: vmrghh v3, v4, v3 ; P9BE-NEXT: clrlwi r4, r3, 16 ; P9BE-NEXT: rlwinm r3, r3, 30, 18, 31 -; P9BE-NEXT: mulld r3, r3, r5 -; P9BE-NEXT: lis r5, 22765 -; P9BE-NEXT: ori r5, r5, 8969 -; P9BE-NEXT: rldicl r3, r3, 30, 34 +; P9BE-NEXT: mulhwu r3, r3, r5 +; P9BE-NEXT: srwi r3, r3, 2 ; P9BE-NEXT: mulli r3, r3, 124 ; P9BE-NEXT: subf r3, r3, r4 +; P9BE-NEXT: lis r4, 22765 ; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: vmrghh v3, v4, v3 ; P9BE-NEXT: mtvsrd v4, r3 ; P9BE-NEXT: li r3, 0 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: clrlwi r3, r3, 16 -; P9BE-NEXT: clrldi r4, r3, 32 -; P9BE-NEXT: mulld r4, r4, r5 -; P9BE-NEXT: rldicl r4, r4, 32, 32 +; P9BE-NEXT: ori r4, r4, 8969 +; P9BE-NEXT: mulhwu r4, r3, r4 ; P9BE-NEXT: subf r5, r4, r3 ; P9BE-NEXT: srwi r5, r5, 1 ; P9BE-NEXT: add r4, r5, r4 @@ -128,45 +122,43 @@ ; P8LE: # %bb.0: ; P8LE-NEXT: xxswapd vs0, v2 ; P8LE-NEXT: lis r3, 22765 -; P8LE-NEXT: lis r8, 21399 +; P8LE-NEXT: lis r7, 21399 +; P8LE-NEXT: lis r10, 16727 ; P8LE-NEXT: ori r3, r3, 8969 -; P8LE-NEXT: ori r8, r8, 33437 +; P8LE-NEXT: ori r7, r7, 33437 +; P8LE-NEXT: ori r10, r10, 2287 ; P8LE-NEXT: mffprd r4, f0 -; P8LE-NEXT: clrldi r5, r4, 48 -; P8LE-NEXT: rldicl r9, r4, 32, 48 -; P8LE-NEXT: clrlwi r6, r5, 16 -; P8LE-NEXT: rldicl r10, r4, 16, 48 -; P8LE-NEXT: rlwinm r11, r9, 0, 16, 31 -; P8LE-NEXT: clrldi r7, r6, 32 -; P8LE-NEXT: rlwinm r12, r10, 0, 16, 31 -; P8LE-NEXT: mulld r3, r7, r3 -; P8LE-NEXT: lis r7, 16727 -; P8LE-NEXT: ori r7, r7, 2287 -; P8LE-NEXT: mulld r8, r11, r8 +; P8LE-NEXT: clrldi r6, r4, 48 +; P8LE-NEXT: rldicl r5, r4, 32, 48 +; P8LE-NEXT: clrlwi r9, r6, 16 +; P8LE-NEXT: rldicl r8, r4, 16, 48 +; P8LE-NEXT: clrlwi r11, r5, 16 +; P8LE-NEXT: mulhwu r3, r9, r3 +; P8LE-NEXT: clrlwi r12, r8, 16 +; P8LE-NEXT: mulhwu r7, r11, r7 ; P8LE-NEXT: lis r11, 8456 ; P8LE-NEXT: rldicl r4, r4, 48, 48 -; P8LE-NEXT: mulld r7, r12, r7 +; P8LE-NEXT: mulhwu r10, r12, r10 ; P8LE-NEXT: ori r11, r11, 16913 ; P8LE-NEXT: rlwinm r12, r4, 30, 18, 31 -; P8LE-NEXT: rldicl r3, r3, 32, 32 -; P8LE-NEXT: mulld r11, r12, r11 -; P8LE-NEXT: subf r6, r3, r6 -; P8LE-NEXT: rldicl r8, r8, 27, 37 -; P8LE-NEXT: srwi r6, r6, 1 -; P8LE-NEXT: add r3, r6, r3 -; P8LE-NEXT: rldicl r6, r7, 24, 40 -; P8LE-NEXT: mulli r7, r8, 98 +; P8LE-NEXT: mulhwu r11, r12, r11 +; P8LE-NEXT: subf r9, r3, r9 +; P8LE-NEXT: srwi r9, r9, 1 +; P8LE-NEXT: srwi r7, r7, 5 +; P8LE-NEXT: add r3, r9, r3 +; P8LE-NEXT: srwi r9, r10, 8 ; P8LE-NEXT: srwi r3, r3, 6 -; P8LE-NEXT: rldicl r8, r11, 30, 34 -; P8LE-NEXT: mulli r6, r6, 1003 +; P8LE-NEXT: mulli r7, r7, 98 +; P8LE-NEXT: srwi r10, r11, 2 +; P8LE-NEXT: mulli r9, r9, 1003 ; P8LE-NEXT: mulli r3, r3, 95 -; P8LE-NEXT: mulli r8, r8, 124 -; P8LE-NEXT: subf r7, r7, r9 -; P8LE-NEXT: subf r6, r6, r10 -; P8LE-NEXT: mtfprd f0, r7 -; P8LE-NEXT: subf r3, r3, r5 -; P8LE-NEXT: subf r4, r8, r4 -; P8LE-NEXT: mtfprd f1, r6 +; P8LE-NEXT: mulli r10, r10, 124 +; P8LE-NEXT: subf r5, r7, r5 +; P8LE-NEXT: subf r7, r9, r8 +; P8LE-NEXT: mtfprd f0, r5 +; P8LE-NEXT: subf r3, r3, r6 +; P8LE-NEXT: subf r4, r10, r4 +; P8LE-NEXT: mtfprd f1, r7 ; P8LE-NEXT: mtfprd f2, r3 ; P8LE-NEXT: xxswapd v2, vs0 ; P8LE-NEXT: mtfprd f3, r4 @@ -182,47 +174,43 @@ ; P8BE: # %bb.0: ; P8BE-NEXT: mfvsrd r4, v2 ; P8BE-NEXT: lis r3, 22765 -; P8BE-NEXT: lis r9, 16727 +; P8BE-NEXT: lis r7, 16727 +; P8BE-NEXT: lis r9, 21399 +; P8BE-NEXT: lis r10, 8456 ; P8BE-NEXT: ori r3, r3, 8969 -; P8BE-NEXT: ori r9, r9, 2287 -; P8BE-NEXT: rldicl r5, r4, 16, 48 -; P8BE-NEXT: clrldi r6, r4, 48 -; P8BE-NEXT: clrlwi r5, r5, 16 -; P8BE-NEXT: rldicl r7, r4, 48, 48 +; P8BE-NEXT: ori r7, r7, 2287 +; P8BE-NEXT: ori r9, r9, 33437 +; P8BE-NEXT: ori r10, r10, 16913 +; P8BE-NEXT: rldicl r6, r4, 16, 48 +; P8BE-NEXT: clrldi r5, r4, 48 ; P8BE-NEXT: clrlwi r6, r6, 16 -; P8BE-NEXT: clrldi r8, r5, 32 -; P8BE-NEXT: clrlwi r7, r7, 16 -; P8BE-NEXT: mulld r3, r8, r3 -; P8BE-NEXT: lis r8, 21399 -; P8BE-NEXT: clrldi r10, r6, 32 -; P8BE-NEXT: ori r8, r8, 33437 -; P8BE-NEXT: clrldi r11, r7, 32 -; P8BE-NEXT: mulld r9, r10, r9 -; P8BE-NEXT: lis r10, 8456 +; P8BE-NEXT: rldicl r8, r4, 48, 48 +; P8BE-NEXT: clrlwi r5, r5, 16 +; P8BE-NEXT: mulhwu r3, r6, r3 ; P8BE-NEXT: rldicl r4, r4, 32, 48 -; P8BE-NEXT: mulld r8, r11, r8 -; P8BE-NEXT: ori r10, r10, 16913 +; P8BE-NEXT: clrlwi r8, r8, 16 +; P8BE-NEXT: mulhwu r7, r5, r7 ; P8BE-NEXT: rlwinm r11, r4, 30, 18, 31 -; P8BE-NEXT: rldicl r3, r3, 32, 32 ; P8BE-NEXT: clrlwi r4, r4, 16 -; P8BE-NEXT: mulld r10, r11, r10 -; P8BE-NEXT: subf r11, r3, r5 +; P8BE-NEXT: mulhwu r9, r8, r9 +; P8BE-NEXT: mulhwu r10, r11, r10 +; P8BE-NEXT: subf r11, r3, r6 ; P8BE-NEXT: srwi r11, r11, 1 -; P8BE-NEXT: rldicl r9, r9, 24, 40 +; P8BE-NEXT: srwi r7, r7, 8 ; P8BE-NEXT: add r3, r11, r3 -; P8BE-NEXT: rldicl r8, r8, 27, 37 +; P8BE-NEXT: srwi r9, r9, 5 +; P8BE-NEXT: srwi r10, r10, 2 +; P8BE-NEXT: mulli r7, r7, 1003 ; P8BE-NEXT: srwi r3, r3, 6 -; P8BE-NEXT: mulli r9, r9, 1003 -; P8BE-NEXT: rldicl r10, r10, 30, 34 -; P8BE-NEXT: mulli r8, r8, 98 +; P8BE-NEXT: mulli r9, r9, 98 ; P8BE-NEXT: mulli r3, r3, 95 ; P8BE-NEXT: mulli r10, r10, 124 -; P8BE-NEXT: subf r6, r9, r6 -; P8BE-NEXT: subf r7, r8, r7 -; P8BE-NEXT: sldi r6, r6, 48 -; P8BE-NEXT: subf r3, r3, r5 +; P8BE-NEXT: subf r5, r7, r5 +; P8BE-NEXT: subf r7, r9, r8 +; P8BE-NEXT: sldi r5, r5, 48 +; P8BE-NEXT: subf r3, r3, r6 ; P8BE-NEXT: subf r4, r10, r4 -; P8BE-NEXT: mtvsrd v2, r6 +; P8BE-NEXT: mtvsrd v2, r5 ; P8BE-NEXT: sldi r5, r7, 48 ; P8BE-NEXT: sldi r3, r3, 48 ; P8BE-NEXT: sldi r4, r4, 48 @@ -242,15 +230,13 @@ ; P9LE: # %bb.0: ; P9LE-NEXT: li r3, 0 ; P9LE-NEXT: vextuhrx r3, r3, v2 +; P9LE-NEXT: lis r5, 22765 +; P9LE-NEXT: ori r5, r5, 8969 ; P9LE-NEXT: clrlwi r4, r3, 16 -; P9LE-NEXT: lis r6, 22765 -; P9LE-NEXT: ori r6, r6, 8969 -; P9LE-NEXT: clrldi r5, r4, 32 -; P9LE-NEXT: mulld r5, r5, r6 -; P9LE-NEXT: rldicl r5, r5, 32, 32 -; P9LE-NEXT: subf r4, r5, r4 +; P9LE-NEXT: mulhwu r6, r4, r5 +; P9LE-NEXT: subf r4, r6, r4 ; P9LE-NEXT: srwi r4, r4, 1 -; P9LE-NEXT: add r4, r4, r5 +; P9LE-NEXT: add r4, r4, r6 ; P9LE-NEXT: srwi r4, r4, 6 ; P9LE-NEXT: mulli r4, r4, 95 ; P9LE-NEXT: subf r3, r4, r3 @@ -258,12 +244,10 @@ ; P9LE-NEXT: li r3, 2 ; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: clrlwi r4, r3, 16 -; P9LE-NEXT: clrldi r5, r4, 32 -; P9LE-NEXT: mulld r5, r5, r6 -; P9LE-NEXT: rldicl r5, r5, 32, 32 -; P9LE-NEXT: subf r4, r5, r4 +; P9LE-NEXT: mulhwu r6, r4, r5 +; P9LE-NEXT: subf r4, r6, r4 ; P9LE-NEXT: srwi r4, r4, 1 -; P9LE-NEXT: add r4, r4, r5 +; P9LE-NEXT: add r4, r4, r6 ; P9LE-NEXT: srwi r4, r4, 6 ; P9LE-NEXT: mulli r4, r4, 95 ; P9LE-NEXT: subf r3, r4, r3 @@ -272,12 +256,10 @@ ; P9LE-NEXT: li r3, 4 ; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: clrlwi r4, r3, 16 -; P9LE-NEXT: clrldi r5, r4, 32 -; P9LE-NEXT: mulld r5, r5, r6 -; P9LE-NEXT: rldicl r5, r5, 32, 32 -; P9LE-NEXT: subf r4, r5, r4 +; P9LE-NEXT: mulhwu r6, r4, r5 +; P9LE-NEXT: subf r4, r6, r4 ; P9LE-NEXT: srwi r4, r4, 1 -; P9LE-NEXT: add r4, r4, r5 +; P9LE-NEXT: add r4, r4, r6 ; P9LE-NEXT: srwi r4, r4, 6 ; P9LE-NEXT: mulli r4, r4, 95 ; P9LE-NEXT: subf r3, r4, r3 @@ -286,9 +268,7 @@ ; P9LE-NEXT: li r3, 6 ; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: clrlwi r4, r3, 16 -; P9LE-NEXT: clrldi r5, r4, 32 -; P9LE-NEXT: mulld r5, r5, r6 -; P9LE-NEXT: rldicl r5, r5, 32, 32 +; P9LE-NEXT: mulhwu r5, r4, r5 ; P9LE-NEXT: subf r4, r5, r4 ; P9LE-NEXT: srwi r4, r4, 1 ; P9LE-NEXT: add r4, r4, r5 @@ -307,55 +287,47 @@ ; P9BE: # %bb.0: ; P9BE-NEXT: li r3, 6 ; P9BE-NEXT: vextuhlx r3, r3, v2 +; P9BE-NEXT: lis r4, 22765 +; P9BE-NEXT: ori r4, r4, 8969 ; P9BE-NEXT: clrlwi r3, r3, 16 -; P9BE-NEXT: lis r5, 22765 -; P9BE-NEXT: ori r5, r5, 8969 -; P9BE-NEXT: clrldi r4, r3, 32 -; P9BE-NEXT: mulld r4, r4, r5 -; P9BE-NEXT: rldicl r4, r4, 32, 32 -; P9BE-NEXT: subf r6, r4, r3 +; P9BE-NEXT: mulhwu r5, r3, r4 +; P9BE-NEXT: subf r6, r5, r3 ; P9BE-NEXT: srwi r6, r6, 1 -; P9BE-NEXT: add r4, r6, r4 -; P9BE-NEXT: srwi r4, r4, 6 -; P9BE-NEXT: mulli r4, r4, 95 -; P9BE-NEXT: subf r3, r4, r3 +; P9BE-NEXT: add r5, r6, r5 +; P9BE-NEXT: srwi r5, r5, 6 +; P9BE-NEXT: mulli r5, r5, 95 +; P9BE-NEXT: subf r3, r5, r3 ; P9BE-NEXT: sldi r3, r3, 48 ; P9BE-NEXT: mtvsrd v3, r3 ; P9BE-NEXT: li r3, 4 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: clrlwi r3, r3, 16 -; P9BE-NEXT: clrldi r4, r3, 32 -; P9BE-NEXT: mulld r4, r4, r5 -; P9BE-NEXT: rldicl r4, r4, 32, 32 -; P9BE-NEXT: subf r6, r4, r3 +; P9BE-NEXT: mulhwu r5, r3, r4 +; P9BE-NEXT: subf r6, r5, r3 ; P9BE-NEXT: srwi r6, r6, 1 -; P9BE-NEXT: add r4, r6, r4 -; P9BE-NEXT: srwi r4, r4, 6 -; P9BE-NEXT: mulli r4, r4, 95 -; P9BE-NEXT: subf r3, r4, r3 +; P9BE-NEXT: add r5, r6, r5 +; P9BE-NEXT: srwi r5, r5, 6 +; P9BE-NEXT: mulli r5, r5, 95 +; P9BE-NEXT: subf r3, r5, r3 ; P9BE-NEXT: sldi r3, r3, 48 ; P9BE-NEXT: mtvsrd v4, r3 ; P9BE-NEXT: li r3, 2 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: clrlwi r3, r3, 16 -; P9BE-NEXT: clrldi r4, r3, 32 -; P9BE-NEXT: mulld r4, r4, r5 -; P9BE-NEXT: rldicl r4, r4, 32, 32 -; P9BE-NEXT: subf r6, r4, r3 +; P9BE-NEXT: mulhwu r5, r3, r4 +; P9BE-NEXT: subf r6, r5, r3 ; P9BE-NEXT: srwi r6, r6, 1 -; P9BE-NEXT: add r4, r6, r4 -; P9BE-NEXT: srwi r4, r4, 6 -; P9BE-NEXT: mulli r4, r4, 95 -; P9BE-NEXT: subf r3, r4, r3 +; P9BE-NEXT: add r5, r6, r5 +; P9BE-NEXT: srwi r5, r5, 6 +; P9BE-NEXT: mulli r5, r5, 95 +; P9BE-NEXT: subf r3, r5, r3 ; P9BE-NEXT: sldi r3, r3, 48 ; P9BE-NEXT: vmrghh v3, v4, v3 ; P9BE-NEXT: mtvsrd v4, r3 ; P9BE-NEXT: li r3, 0 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: clrlwi r3, r3, 16 -; P9BE-NEXT: clrldi r4, r3, 32 -; P9BE-NEXT: mulld r4, r4, r5 -; P9BE-NEXT: rldicl r4, r4, 32, 32 +; P9BE-NEXT: mulhwu r4, r3, r4 ; P9BE-NEXT: subf r5, r4, r3 ; P9BE-NEXT: srwi r5, r5, 1 ; P9BE-NEXT: add r4, r5, r4 @@ -371,62 +343,52 @@ ; P8LE-LABEL: fold_urem_vec_2: ; P8LE: # %bb.0: ; P8LE-NEXT: xxswapd vs0, v2 -; P8LE-NEXT: lis r4, 22765 -; P8LE-NEXT: std r29, -24(r1) # 8-byte Folded Spill +; P8LE-NEXT: lis r3, 22765 ; P8LE-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; P8LE-NEXT: ori r4, r4, 8969 -; P8LE-NEXT: mffprd r5, f0 -; P8LE-NEXT: clrldi r3, r5, 48 -; P8LE-NEXT: rldicl r6, r5, 48, 48 -; P8LE-NEXT: clrlwi r8, r3, 16 -; P8LE-NEXT: rldicl r7, r5, 32, 48 +; P8LE-NEXT: ori r3, r3, 8969 +; P8LE-NEXT: mffprd r4, f0 +; P8LE-NEXT: clrldi r5, r4, 48 +; P8LE-NEXT: rldicl r6, r4, 48, 48 +; P8LE-NEXT: clrlwi r8, r5, 16 +; P8LE-NEXT: rldicl r7, r4, 32, 48 ; P8LE-NEXT: clrlwi r9, r6, 16 -; P8LE-NEXT: rldicl r5, r5, 16, 48 -; P8LE-NEXT: clrldi r11, r8, 32 -; P8LE-NEXT: clrlwi r10, r7, 16 -; P8LE-NEXT: clrlwi r12, r5, 16 -; P8LE-NEXT: mulld r11, r11, r4 -; P8LE-NEXT: clrldi r0, r9, 32 -; P8LE-NEXT: clrldi r30, r10, 32 -; P8LE-NEXT: clrldi r29, r12, 32 -; P8LE-NEXT: mulld r0, r0, r4 -; P8LE-NEXT: mulld r30, r30, r4 -; P8LE-NEXT: mulld r4, r29, r4 -; P8LE-NEXT: ld r29, -24(r1) # 8-byte Folded Reload -; P8LE-NEXT: rldicl r11, r11, 32, 32 -; P8LE-NEXT: subf r8, r11, r8 -; P8LE-NEXT: rldicl r0, r0, 32, 32 +; P8LE-NEXT: rldicl r4, r4, 16, 48 +; P8LE-NEXT: mulhwu r10, r8, r3 +; P8LE-NEXT: clrlwi r11, r7, 16 +; P8LE-NEXT: clrlwi r0, r4, 16 +; P8LE-NEXT: mulhwu r12, r9, r3 +; P8LE-NEXT: mulhwu r30, r11, r3 +; P8LE-NEXT: mulhwu r3, r0, r3 +; P8LE-NEXT: subf r8, r10, r8 ; P8LE-NEXT: srwi r8, r8, 1 -; P8LE-NEXT: rldicl r30, r30, 32, 32 -; P8LE-NEXT: rldicl r4, r4, 32, 32 -; P8LE-NEXT: subf r9, r0, r9 -; P8LE-NEXT: add r8, r8, r11 -; P8LE-NEXT: subf r10, r30, r10 -; P8LE-NEXT: subf r11, r4, r12 +; P8LE-NEXT: subf r9, r12, r9 +; P8LE-NEXT: add r8, r8, r10 +; P8LE-NEXT: subf r10, r30, r11 +; P8LE-NEXT: subf r11, r3, r0 ; P8LE-NEXT: srwi r9, r9, 1 -; P8LE-NEXT: srwi r8, r8, 6 ; P8LE-NEXT: srwi r10, r10, 1 ; P8LE-NEXT: srwi r11, r11, 1 -; P8LE-NEXT: add r9, r9, r0 +; P8LE-NEXT: add r9, r9, r12 +; P8LE-NEXT: srwi r8, r8, 6 ; P8LE-NEXT: add r10, r10, r30 -; P8LE-NEXT: add r4, r11, r4 +; P8LE-NEXT: add r3, r11, r3 ; P8LE-NEXT: srwi r9, r9, 6 ; P8LE-NEXT: ld r30, -16(r1) # 8-byte Folded Reload ; P8LE-NEXT: mulli r8, r8, 95 ; P8LE-NEXT: srwi r10, r10, 6 -; P8LE-NEXT: srwi r4, r4, 6 +; P8LE-NEXT: srwi r3, r3, 6 ; P8LE-NEXT: mulli r9, r9, 95 ; P8LE-NEXT: mulli r10, r10, 95 -; P8LE-NEXT: mulli r4, r4, 95 -; P8LE-NEXT: subf r3, r8, r3 +; P8LE-NEXT: mulli r3, r3, 95 +; P8LE-NEXT: subf r5, r8, r5 ; P8LE-NEXT: subf r6, r9, r6 -; P8LE-NEXT: mtfprd f0, r3 -; P8LE-NEXT: subf r3, r10, r7 -; P8LE-NEXT: subf r4, r4, r5 +; P8LE-NEXT: mtfprd f0, r5 +; P8LE-NEXT: subf r5, r10, r7 +; P8LE-NEXT: subf r3, r3, r4 ; P8LE-NEXT: mtfprd f1, r6 -; P8LE-NEXT: mtfprd f2, r3 +; P8LE-NEXT: mtfprd f2, r5 ; P8LE-NEXT: xxswapd v2, vs0 -; P8LE-NEXT: mtfprd f3, r4 +; P8LE-NEXT: mtfprd f3, r3 ; P8LE-NEXT: xxswapd v3, vs1 ; P8LE-NEXT: xxswapd v4, vs2 ; P8LE-NEXT: xxswapd v5, vs3 @@ -445,24 +407,16 @@ ; P8BE-NEXT: clrlwi r5, r5, 16 ; P8BE-NEXT: rldicl r7, r4, 32, 48 ; P8BE-NEXT: clrlwi r6, r6, 16 -; P8BE-NEXT: clrldi r8, r5, 32 +; P8BE-NEXT: mulhwu r8, r5, r3 ; P8BE-NEXT: rldicl r4, r4, 16, 48 ; P8BE-NEXT: clrlwi r7, r7, 16 -; P8BE-NEXT: clrldi r9, r6, 32 -; P8BE-NEXT: mulld r8, r8, r3 +; P8BE-NEXT: mulhwu r9, r6, r3 ; P8BE-NEXT: clrlwi r4, r4, 16 -; P8BE-NEXT: clrldi r10, r7, 32 -; P8BE-NEXT: mulld r9, r9, r3 -; P8BE-NEXT: clrldi r11, r4, 32 -; P8BE-NEXT: mulld r10, r10, r3 -; P8BE-NEXT: mulld r3, r11, r3 -; P8BE-NEXT: rldicl r8, r8, 32, 32 -; P8BE-NEXT: rldicl r9, r9, 32, 32 +; P8BE-NEXT: mulhwu r10, r7, r3 +; P8BE-NEXT: mulhwu r3, r4, r3 ; P8BE-NEXT: subf r11, r8, r5 -; P8BE-NEXT: rldicl r10, r10, 32, 32 ; P8BE-NEXT: subf r12, r9, r6 ; P8BE-NEXT: srwi r11, r11, 1 -; P8BE-NEXT: rldicl r3, r3, 32, 32 ; P8BE-NEXT: add r8, r11, r8 ; P8BE-NEXT: subf r11, r10, r7 ; P8BE-NEXT: srwi r12, r12, 1 @@ -507,39 +461,33 @@ ; P9LE: # %bb.0: ; P9LE-NEXT: li r3, 0 ; P9LE-NEXT: vextuhrx r3, r3, v2 +; P9LE-NEXT: lis r5, 22765 +; P9LE-NEXT: ori r5, r5, 8969 ; P9LE-NEXT: clrlwi r4, r3, 16 -; P9LE-NEXT: lis r6, 22765 -; P9LE-NEXT: ori r6, r6, 8969 -; P9LE-NEXT: clrldi r5, r4, 32 -; P9LE-NEXT: mulld r5, r5, r6 -; P9LE-NEXT: rldicl r5, r5, 32, 32 -; P9LE-NEXT: subf r4, r5, r4 +; P9LE-NEXT: mulhwu r6, r4, r5 +; P9LE-NEXT: subf r4, r6, r4 ; P9LE-NEXT: srwi r4, r4, 1 -; P9LE-NEXT: add r4, r4, r5 +; P9LE-NEXT: add r4, r4, r6 ; P9LE-NEXT: srwi r4, r4, 6 -; P9LE-NEXT: mulli r5, r4, 95 -; P9LE-NEXT: subf r3, r5, r3 +; P9LE-NEXT: mulli r6, r4, 95 +; P9LE-NEXT: subf r3, r6, r3 ; P9LE-NEXT: mtfprd f0, r3 ; P9LE-NEXT: li r3, 2 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: clrlwi r5, r3, 16 -; P9LE-NEXT: clrldi r7, r5, 32 -; P9LE-NEXT: mulld r7, r7, r6 -; P9LE-NEXT: rldicl r7, r7, 32, 32 -; P9LE-NEXT: subf r5, r7, r5 -; P9LE-NEXT: srwi r5, r5, 1 -; P9LE-NEXT: add r5, r5, r7 -; P9LE-NEXT: srwi r5, r5, 6 -; P9LE-NEXT: mulli r7, r5, 95 +; P9LE-NEXT: clrlwi r6, r3, 16 +; P9LE-NEXT: mulhwu r7, r6, r5 +; P9LE-NEXT: subf r6, r7, r6 +; P9LE-NEXT: srwi r6, r6, 1 +; P9LE-NEXT: add r6, r6, r7 +; P9LE-NEXT: srwi r6, r6, 6 +; P9LE-NEXT: mulli r7, r6, 95 ; P9LE-NEXT: subf r3, r7, r3 ; P9LE-NEXT: xxswapd v3, vs0 ; P9LE-NEXT: mtfprd f0, r3 ; P9LE-NEXT: li r3, 4 ; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: clrlwi r7, r3, 16 -; P9LE-NEXT: clrldi r8, r7, 32 -; P9LE-NEXT: mulld r8, r8, r6 -; P9LE-NEXT: rldicl r8, r8, 32, 32 +; P9LE-NEXT: mulhwu r8, r7, r5 ; P9LE-NEXT: subf r7, r8, r7 ; P9LE-NEXT: srwi r7, r7, 1 ; P9LE-NEXT: add r7, r7, r8 @@ -551,14 +499,12 @@ ; P9LE-NEXT: li r3, 6 ; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: clrlwi r8, r3, 16 -; P9LE-NEXT: clrldi r9, r8, 32 -; P9LE-NEXT: mulld r6, r9, r6 -; P9LE-NEXT: rldicl r6, r6, 32, 32 -; P9LE-NEXT: subf r8, r6, r8 +; P9LE-NEXT: mulhwu r5, r8, r5 +; P9LE-NEXT: subf r8, r5, r8 ; P9LE-NEXT: srwi r8, r8, 1 -; P9LE-NEXT: add r6, r8, r6 -; P9LE-NEXT: srwi r6, r6, 6 -; P9LE-NEXT: mulli r8, r6, 95 +; P9LE-NEXT: add r5, r8, r5 +; P9LE-NEXT: srwi r5, r5, 6 +; P9LE-NEXT: mulli r8, r5, 95 ; P9LE-NEXT: subf r3, r8, r3 ; P9LE-NEXT: vmrglh v3, v4, v3 ; P9LE-NEXT: xxswapd v4, vs0 @@ -568,12 +514,12 @@ ; P9LE-NEXT: vmrglh v2, v2, v4 ; P9LE-NEXT: vmrglw v2, v2, v3 ; P9LE-NEXT: xxswapd v3, vs0 -; P9LE-NEXT: mtfprd f0, r5 +; P9LE-NEXT: mtfprd f0, r6 ; P9LE-NEXT: xxswapd v4, vs0 ; P9LE-NEXT: mtfprd f0, r7 ; P9LE-NEXT: vmrglh v3, v4, v3 ; P9LE-NEXT: xxswapd v4, vs0 -; P9LE-NEXT: mtfprd f0, r6 +; P9LE-NEXT: mtfprd f0, r5 ; P9LE-NEXT: xxswapd v5, vs0 ; P9LE-NEXT: vmrglh v4, v5, v4 ; P9LE-NEXT: vmrglw v3, v4, v3 @@ -584,40 +530,34 @@ ; P9BE: # %bb.0: ; P9BE-NEXT: li r3, 6 ; P9BE-NEXT: vextuhlx r3, r3, v2 +; P9BE-NEXT: lis r5, 22765 +; P9BE-NEXT: ori r5, r5, 8969 ; P9BE-NEXT: clrlwi r4, r3, 16 -; P9BE-NEXT: lis r6, 22765 -; P9BE-NEXT: ori r6, r6, 8969 -; P9BE-NEXT: clrldi r5, r4, 32 -; P9BE-NEXT: mulld r5, r5, r6 -; P9BE-NEXT: rldicl r5, r5, 32, 32 -; P9BE-NEXT: subf r4, r5, r4 +; P9BE-NEXT: mulhwu r6, r4, r5 +; P9BE-NEXT: subf r4, r6, r4 ; P9BE-NEXT: srwi r4, r4, 1 -; P9BE-NEXT: add r4, r4, r5 +; P9BE-NEXT: add r4, r4, r6 ; P9BE-NEXT: srwi r4, r4, 6 -; P9BE-NEXT: mulli r5, r4, 95 -; P9BE-NEXT: subf r3, r5, r3 +; P9BE-NEXT: mulli r6, r4, 95 +; P9BE-NEXT: subf r3, r6, r3 ; P9BE-NEXT: sldi r3, r3, 48 ; P9BE-NEXT: mtvsrd v3, r3 ; P9BE-NEXT: li r3, 4 ; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: clrlwi r5, r3, 16 -; P9BE-NEXT: clrldi r7, r5, 32 -; P9BE-NEXT: mulld r7, r7, r6 -; P9BE-NEXT: rldicl r7, r7, 32, 32 -; P9BE-NEXT: subf r5, r7, r5 -; P9BE-NEXT: srwi r5, r5, 1 -; P9BE-NEXT: add r5, r5, r7 -; P9BE-NEXT: srwi r5, r5, 6 -; P9BE-NEXT: mulli r7, r5, 95 +; P9BE-NEXT: clrlwi r6, r3, 16 +; P9BE-NEXT: mulhwu r7, r6, r5 +; P9BE-NEXT: subf r6, r7, r6 +; P9BE-NEXT: srwi r6, r6, 1 +; P9BE-NEXT: add r6, r6, r7 +; P9BE-NEXT: srwi r6, r6, 6 +; P9BE-NEXT: mulli r7, r6, 95 ; P9BE-NEXT: subf r3, r7, r3 ; P9BE-NEXT: sldi r3, r3, 48 ; P9BE-NEXT: mtvsrd v4, r3 ; P9BE-NEXT: li r3, 2 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: clrlwi r7, r3, 16 -; P9BE-NEXT: clrldi r8, r7, 32 -; P9BE-NEXT: mulld r8, r8, r6 -; P9BE-NEXT: rldicl r8, r8, 32, 32 +; P9BE-NEXT: mulhwu r8, r7, r5 ; P9BE-NEXT: subf r7, r8, r7 ; P9BE-NEXT: srwi r7, r7, 1 ; P9BE-NEXT: add r7, r7, r8 @@ -630,14 +570,12 @@ ; P9BE-NEXT: li r3, 0 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: clrlwi r3, r3, 16 -; P9BE-NEXT: clrldi r8, r3, 32 -; P9BE-NEXT: mulld r6, r8, r6 -; P9BE-NEXT: rldicl r6, r6, 32, 32 -; P9BE-NEXT: subf r8, r6, r3 +; P9BE-NEXT: mulhwu r5, r3, r5 +; P9BE-NEXT: subf r8, r5, r3 ; P9BE-NEXT: srwi r8, r8, 1 -; P9BE-NEXT: add r6, r8, r6 -; P9BE-NEXT: srwi r6, r6, 6 -; P9BE-NEXT: mulli r8, r6, 95 +; P9BE-NEXT: add r5, r8, r5 +; P9BE-NEXT: srwi r5, r5, 6 +; P9BE-NEXT: mulli r8, r5, 95 ; P9BE-NEXT: subf r3, r8, r3 ; P9BE-NEXT: sldi r3, r3, 48 ; P9BE-NEXT: mtvsrd v2, r3 @@ -645,12 +583,12 @@ ; P9BE-NEXT: vmrghh v2, v2, v4 ; P9BE-NEXT: vmrghw v2, v2, v3 ; P9BE-NEXT: mtvsrd v3, r3 -; P9BE-NEXT: sldi r3, r5, 48 +; P9BE-NEXT: sldi r3, r6, 48 ; P9BE-NEXT: mtvsrd v4, r3 ; P9BE-NEXT: sldi r3, r7, 48 ; P9BE-NEXT: vmrghh v3, v4, v3 ; P9BE-NEXT: mtvsrd v4, r3 -; P9BE-NEXT: sldi r3, r6, 48 +; P9BE-NEXT: sldi r3, r5, 48 ; P9BE-NEXT: mtvsrd v5, r3 ; P9BE-NEXT: vmrghh v4, v5, v4 ; P9BE-NEXT: vmrghw v3, v4, v3 @@ -660,68 +598,58 @@ ; P8LE-LABEL: combine_urem_udiv: ; P8LE: # %bb.0: ; P8LE-NEXT: xxswapd vs0, v2 -; P8LE-NEXT: lis r5, 22765 +; P8LE-NEXT: lis r4, 22765 ; P8LE-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; P8LE-NEXT: std r29, -24(r1) # 8-byte Folded Spill -; P8LE-NEXT: ori r5, r5, 8969 -; P8LE-NEXT: mffprd r6, f0 -; P8LE-NEXT: clrldi r3, r6, 48 -; P8LE-NEXT: rldicl r4, r6, 48, 48 -; P8LE-NEXT: rldicl r7, r6, 32, 48 +; P8LE-NEXT: ori r4, r4, 8969 +; P8LE-NEXT: mffprd r5, f0 +; P8LE-NEXT: clrldi r3, r5, 48 +; P8LE-NEXT: rldicl r6, r5, 48, 48 ; P8LE-NEXT: clrlwi r8, r3, 16 -; P8LE-NEXT: clrlwi r9, r4, 16 -; P8LE-NEXT: rldicl r6, r6, 16, 48 -; P8LE-NEXT: clrlwi r10, r7, 16 -; P8LE-NEXT: clrldi r11, r8, 32 -; P8LE-NEXT: clrlwi r12, r6, 16 -; P8LE-NEXT: clrldi r0, r9, 32 -; P8LE-NEXT: clrldi r30, r10, 32 -; P8LE-NEXT: mulld r11, r11, r5 -; P8LE-NEXT: clrldi r29, r12, 32 -; P8LE-NEXT: mulld r0, r0, r5 -; P8LE-NEXT: mulld r30, r30, r5 -; P8LE-NEXT: mulld r5, r29, r5 -; P8LE-NEXT: ld r29, -24(r1) # 8-byte Folded Reload -; P8LE-NEXT: rldicl r11, r11, 32, 32 -; P8LE-NEXT: rldicl r0, r0, 32, 32 -; P8LE-NEXT: rldicl r30, r30, 32, 32 -; P8LE-NEXT: subf r8, r11, r8 -; P8LE-NEXT: rldicl r5, r5, 32, 32 -; P8LE-NEXT: subf r9, r0, r9 +; P8LE-NEXT: rldicl r7, r5, 32, 48 +; P8LE-NEXT: clrlwi r9, r6, 16 +; P8LE-NEXT: mulhwu r10, r8, r4 +; P8LE-NEXT: clrlwi r11, r7, 16 +; P8LE-NEXT: rldicl r5, r5, 16, 48 +; P8LE-NEXT: mulhwu r12, r9, r4 +; P8LE-NEXT: mulhwu r0, r11, r4 +; P8LE-NEXT: clrlwi r30, r5, 16 +; P8LE-NEXT: mulhwu r4, r30, r4 +; P8LE-NEXT: subf r8, r10, r8 ; P8LE-NEXT: srwi r8, r8, 1 -; P8LE-NEXT: subf r10, r30, r10 -; P8LE-NEXT: add r8, r8, r11 +; P8LE-NEXT: subf r9, r12, r9 +; P8LE-NEXT: add r8, r8, r10 +; P8LE-NEXT: subf r10, r0, r11 ; P8LE-NEXT: srwi r9, r9, 1 ; P8LE-NEXT: srwi r10, r10, 1 -; P8LE-NEXT: subf r11, r5, r12 -; P8LE-NEXT: add r9, r9, r0 +; P8LE-NEXT: subf r11, r4, r30 +; P8LE-NEXT: add r9, r9, r12 ; P8LE-NEXT: srwi r8, r8, 6 -; P8LE-NEXT: add r10, r10, r30 -; P8LE-NEXT: srwi r11, r11, 1 ; P8LE-NEXT: ld r30, -16(r1) # 8-byte Folded Reload +; P8LE-NEXT: add r10, r10, r0 +; P8LE-NEXT: srwi r11, r11, 1 ; P8LE-NEXT: srwi r9, r9, 6 +; P8LE-NEXT: mtfprd f0, r8 ; P8LE-NEXT: mulli r12, r8, 95 ; P8LE-NEXT: srwi r10, r10, 6 -; P8LE-NEXT: add r5, r11, r5 -; P8LE-NEXT: mtfprd f0, r8 -; P8LE-NEXT: mulli r8, r9, 95 +; P8LE-NEXT: add r4, r11, r4 ; P8LE-NEXT: mtfprd f1, r9 +; P8LE-NEXT: mulli r8, r9, 95 ; P8LE-NEXT: mulli r9, r10, 95 -; P8LE-NEXT: srwi r5, r5, 6 -; P8LE-NEXT: mtfprd f3, r5 -; P8LE-NEXT: mulli r5, r5, 95 +; P8LE-NEXT: srwi r4, r4, 6 ; P8LE-NEXT: xxswapd v2, vs0 -; P8LE-NEXT: xxswapd v3, vs1 ; P8LE-NEXT: mtfprd f2, r10 +; P8LE-NEXT: mtfprd f3, r4 +; P8LE-NEXT: mulli r4, r4, 95 +; P8LE-NEXT: xxswapd v3, vs1 +; P8LE-NEXT: xxswapd v1, vs2 ; P8LE-NEXT: subf r3, r12, r3 ; P8LE-NEXT: xxswapd v6, vs3 ; P8LE-NEXT: mtfprd f0, r3 ; P8LE-NEXT: subf r3, r9, r7 -; P8LE-NEXT: subf r4, r8, r4 -; P8LE-NEXT: xxswapd v1, vs2 +; P8LE-NEXT: subf r6, r8, r6 ; P8LE-NEXT: mtfprd f4, r3 -; P8LE-NEXT: subf r3, r5, r6 -; P8LE-NEXT: mtfprd f1, r4 +; P8LE-NEXT: subf r3, r4, r5 +; P8LE-NEXT: mtfprd f1, r6 ; P8LE-NEXT: mtfprd f5, r3 ; P8LE-NEXT: xxswapd v5, vs4 ; P8LE-NEXT: vmrglh v2, v3, v2 @@ -738,71 +666,61 @@ ; ; P8BE-LABEL: combine_urem_udiv: ; P8BE: # %bb.0: -; P8BE-NEXT: mfvsrd r6, v2 -; P8BE-NEXT: lis r5, 22765 -; P8BE-NEXT: std r30, -16(r1) # 8-byte Folded Spill -; P8BE-NEXT: ori r5, r5, 8969 -; P8BE-NEXT: clrldi r3, r6, 48 -; P8BE-NEXT: rldicl r4, r6, 48, 48 +; P8BE-NEXT: mfvsrd r5, v2 +; P8BE-NEXT: lis r4, 22765 +; P8BE-NEXT: ori r4, r4, 8969 +; P8BE-NEXT: clrldi r3, r5, 48 +; P8BE-NEXT: rldicl r6, r5, 48, 48 ; P8BE-NEXT: clrlwi r8, r3, 16 -; P8BE-NEXT: rldicl r7, r6, 32, 48 -; P8BE-NEXT: clrlwi r9, r4, 16 -; P8BE-NEXT: rldicl r6, r6, 16, 48 -; P8BE-NEXT: clrldi r11, r8, 32 -; P8BE-NEXT: clrlwi r10, r7, 16 -; P8BE-NEXT: clrlwi r6, r6, 16 -; P8BE-NEXT: clrldi r12, r9, 32 -; P8BE-NEXT: mulld r11, r11, r5 -; P8BE-NEXT: clrldi r0, r10, 32 -; P8BE-NEXT: clrldi r30, r6, 32 -; P8BE-NEXT: mulld r12, r12, r5 -; P8BE-NEXT: mulld r0, r0, r5 -; P8BE-NEXT: mulld r5, r30, r5 -; P8BE-NEXT: ld r30, -16(r1) # 8-byte Folded Reload -; P8BE-NEXT: rldicl r11, r11, 32, 32 -; P8BE-NEXT: rldicl r12, r12, 32, 32 -; P8BE-NEXT: subf r8, r11, r8 -; P8BE-NEXT: rldicl r5, r5, 32, 32 +; P8BE-NEXT: rldicl r7, r5, 32, 48 +; P8BE-NEXT: clrlwi r9, r6, 16 +; P8BE-NEXT: rldicl r5, r5, 16, 48 +; P8BE-NEXT: mulhwu r10, r8, r4 +; P8BE-NEXT: clrlwi r11, r7, 16 +; P8BE-NEXT: mulhwu r12, r9, r4 +; P8BE-NEXT: clrlwi r5, r5, 16 +; P8BE-NEXT: mulhwu r0, r11, r4 +; P8BE-NEXT: mulhwu r4, r5, r4 +; P8BE-NEXT: subf r8, r10, r8 ; P8BE-NEXT: subf r9, r12, r9 ; P8BE-NEXT: srwi r8, r8, 1 -; P8BE-NEXT: rldicl r0, r0, 32, 32 -; P8BE-NEXT: add r8, r8, r11 +; P8BE-NEXT: add r8, r8, r10 +; P8BE-NEXT: subf r10, r0, r11 ; P8BE-NEXT: srwi r9, r9, 1 -; P8BE-NEXT: subf r11, r5, r6 -; P8BE-NEXT: subf r10, r0, r10 +; P8BE-NEXT: subf r11, r4, r5 ; P8BE-NEXT: add r9, r9, r12 ; P8BE-NEXT: srwi r8, r8, 6 ; P8BE-NEXT: srwi r11, r11, 1 ; P8BE-NEXT: srwi r10, r10, 1 ; P8BE-NEXT: srwi r9, r9, 6 -; P8BE-NEXT: add r5, r11, r5 ; P8BE-NEXT: mulli r12, r8, 95 +; P8BE-NEXT: add r4, r11, r4 ; P8BE-NEXT: add r10, r10, r0 -; P8BE-NEXT: srwi r5, r5, 6 ; P8BE-NEXT: mulli r11, r9, 95 -; P8BE-NEXT: sldi r9, r9, 48 +; P8BE-NEXT: srwi r4, r4, 6 ; P8BE-NEXT: srwi r10, r10, 6 +; P8BE-NEXT: sldi r9, r9, 48 ; P8BE-NEXT: sldi r8, r8, 48 ; P8BE-NEXT: mtvsrd v3, r9 -; P8BE-NEXT: mulli r9, r5, 95 +; P8BE-NEXT: mulli r9, r4, 95 ; P8BE-NEXT: mtvsrd v2, r8 ; P8BE-NEXT: mulli r8, r10, 95 -; P8BE-NEXT: sldi r10, r10, 48 ; P8BE-NEXT: subf r3, r12, r3 -; P8BE-NEXT: mtvsrd v4, r10 -; P8BE-NEXT: subf r4, r11, r4 +; P8BE-NEXT: subf r6, r11, r6 ; P8BE-NEXT: sldi r3, r3, 48 ; P8BE-NEXT: vmrghh v2, v3, v2 -; P8BE-NEXT: sldi r4, r4, 48 +; P8BE-NEXT: sldi r6, r6, 48 +; P8BE-NEXT: sldi r10, r10, 48 ; P8BE-NEXT: mtvsrd v3, r3 -; P8BE-NEXT: subf r3, r9, r6 +; P8BE-NEXT: subf r3, r9, r5 ; P8BE-NEXT: subf r7, r8, r7 -; P8BE-NEXT: mtvsrd v5, r4 +; P8BE-NEXT: mtvsrd v5, r6 ; P8BE-NEXT: sldi r3, r3, 48 -; P8BE-NEXT: sldi r6, r7, 48 +; P8BE-NEXT: sldi r5, r7, 48 ; P8BE-NEXT: mtvsrd v1, r3 -; P8BE-NEXT: sldi r3, r5, 48 -; P8BE-NEXT: mtvsrd v0, r6 +; P8BE-NEXT: sldi r3, r4, 48 +; P8BE-NEXT: mtvsrd v4, r10 +; P8BE-NEXT: mtvsrd v0, r5 ; P8BE-NEXT: vmrghh v3, v5, v3 ; P8BE-NEXT: mtvsrd v5, r3 ; P8BE-NEXT: vmrghh v0, v1, v0 @@ -832,14 +750,11 @@ ; P9LE-NEXT: mtfprd f0, r3 ; P9LE-NEXT: li r3, 6 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: clrlwi r4, r3, 16 -; P9LE-NEXT: lis r6, 22765 -; P9LE-NEXT: ori r6, r6, 8969 +; P9LE-NEXT: lis r5, 22765 +; P9LE-NEXT: ori r5, r5, 8969 ; P9LE-NEXT: xxswapd v4, vs0 -; P9LE-NEXT: vmrglh v3, v4, v3 -; P9LE-NEXT: clrldi r5, r4, 32 -; P9LE-NEXT: mulld r5, r5, r6 -; P9LE-NEXT: rldicl r5, r5, 32, 32 +; P9LE-NEXT: clrlwi r4, r3, 16 +; P9LE-NEXT: mulhwu r5, r4, r5 ; P9LE-NEXT: subf r4, r5, r4 ; P9LE-NEXT: srwi r4, r4, 1 ; P9LE-NEXT: add r4, r4, r5 @@ -850,6 +765,7 @@ ; P9LE-NEXT: li r3, 4 ; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: clrlwi r3, r3, 29 +; P9LE-NEXT: vmrglh v3, v4, v3 ; P9LE-NEXT: xxswapd v4, vs0 ; P9LE-NEXT: mtfprd f0, r3 ; P9LE-NEXT: xxswapd v2, vs0 @@ -871,13 +787,11 @@ ; P9BE-NEXT: mtvsrd v4, r3 ; P9BE-NEXT: li r3, 6 ; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: clrlwi r3, r3, 16 -; P9BE-NEXT: lis r5, 22765 -; P9BE-NEXT: ori r5, r5, 8969 +; P9BE-NEXT: lis r4, 22765 +; P9BE-NEXT: ori r4, r4, 8969 ; P9BE-NEXT: vmrghh v3, v4, v3 -; P9BE-NEXT: clrldi r4, r3, 32 -; P9BE-NEXT: mulld r4, r4, r5 -; P9BE-NEXT: rldicl r4, r4, 32, 32 +; P9BE-NEXT: clrlwi r3, r3, 16 +; P9BE-NEXT: mulhwu r4, r3, r4 ; P9BE-NEXT: subf r5, r4, r3 ; P9BE-NEXT: srwi r5, r5, 1 ; P9BE-NEXT: add r4, r5, r4 @@ -902,28 +816,26 @@ ; P8LE-NEXT: ori r3, r3, 8969 ; P8LE-NEXT: mffprd r4, f0 ; P8LE-NEXT: rldicl r5, r4, 16, 48 -; P8LE-NEXT: clrlwi r6, r5, 16 -; P8LE-NEXT: clrldi r7, r6, 32 -; P8LE-NEXT: mulld r3, r7, r3 ; P8LE-NEXT: rldicl r7, r4, 48, 48 -; P8LE-NEXT: clrlwi r7, r7, 27 -; P8LE-NEXT: mtfprd f1, r7 -; P8LE-NEXT: rldicl r3, r3, 32, 32 -; P8LE-NEXT: xxswapd v3, vs1 +; P8LE-NEXT: clrlwi r6, r5, 16 +; P8LE-NEXT: mulhwu r3, r6, r3 ; P8LE-NEXT: subf r6, r3, r6 ; P8LE-NEXT: srwi r6, r6, 1 ; P8LE-NEXT: add r3, r6, r3 ; P8LE-NEXT: clrldi r6, r4, 48 ; P8LE-NEXT: srwi r3, r3, 6 -; P8LE-NEXT: rldicl r4, r4, 32, 48 ; P8LE-NEXT: clrlwi r6, r6, 26 ; P8LE-NEXT: mulli r3, r3, 95 -; P8LE-NEXT: clrlwi r4, r4, 29 +; P8LE-NEXT: rldicl r4, r4, 32, 48 ; P8LE-NEXT: mtfprd f0, r6 +; P8LE-NEXT: clrlwi r6, r7, 27 +; P8LE-NEXT: clrlwi r4, r4, 29 +; P8LE-NEXT: mtfprd f1, r6 ; P8LE-NEXT: mtfprd f3, r4 ; P8LE-NEXT: xxswapd v2, vs0 -; P8LE-NEXT: xxswapd v5, vs3 +; P8LE-NEXT: xxswapd v3, vs1 ; P8LE-NEXT: subf r3, r3, r5 +; P8LE-NEXT: xxswapd v5, vs3 ; P8LE-NEXT: mtfprd f2, r3 ; P8LE-NEXT: vmrglh v2, v3, v2 ; P8LE-NEXT: xxswapd v4, vs2 @@ -940,9 +852,7 @@ ; P8BE-NEXT: rldicl r7, r4, 16, 48 ; P8BE-NEXT: clrlwi r5, r5, 16 ; P8BE-NEXT: clrlwi r7, r7, 26 -; P8BE-NEXT: clrldi r6, r5, 32 -; P8BE-NEXT: mulld r3, r6, r3 -; P8BE-NEXT: rldicl r3, r3, 32, 32 +; P8BE-NEXT: mulhwu r3, r5, r3 ; P8BE-NEXT: subf r6, r3, r5 ; P8BE-NEXT: srwi r6, r6, 1 ; P8BE-NEXT: add r3, r6, r3 @@ -974,25 +884,24 @@ ; P9LE-LABEL: dont_fold_urem_one: ; P9LE: # %bb.0: ; P9LE-NEXT: li r3, 4 -; P9LE-NEXT: li r5, 0 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: oris r6, r5, 45590 -; P9LE-NEXT: oris r5, r5, 51306 -; P9LE-NEXT: ori r6, r6, 17097 -; P9LE-NEXT: ori r5, r5, 30865 -; P9LE-NEXT: rlwinm r4, r3, 0, 16, 31 -; P9LE-NEXT: mulld r4, r4, r6 -; P9LE-NEXT: lis r6, 24749 -; P9LE-NEXT: ori r6, r6, 47143 -; P9LE-NEXT: rldicl r4, r4, 28, 36 +; P9LE-NEXT: lis r5, -19946 +; P9LE-NEXT: ori r5, r5, 17097 +; P9LE-NEXT: clrlwi r4, r3, 16 +; P9LE-NEXT: mulhwu r4, r4, r5 +; P9LE-NEXT: lis r5, 24749 +; P9LE-NEXT: ori r5, r5, 47143 +; P9LE-NEXT: srwi r4, r4, 4 ; P9LE-NEXT: mulli r4, r4, 23 ; P9LE-NEXT: subf r3, r4, r3 ; P9LE-NEXT: mtfprd f0, r3 ; P9LE-NEXT: li r3, 6 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: rlwinm r4, r3, 0, 16, 31 -; P9LE-NEXT: mulld r4, r4, r6 -; P9LE-NEXT: rldicl r4, r4, 21, 43 +; P9LE-NEXT: clrlwi r4, r3, 16 +; P9LE-NEXT: mulhwu r4, r4, r5 +; P9LE-NEXT: lis r5, -14230 +; P9LE-NEXT: ori r5, r5, 30865 +; P9LE-NEXT: srwi r4, r4, 11 ; P9LE-NEXT: mulli r4, r4, 5423 ; P9LE-NEXT: subf r3, r4, r3 ; P9LE-NEXT: xxswapd v3, vs0 @@ -1000,8 +909,8 @@ ; P9LE-NEXT: li r3, 2 ; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: rlwinm r4, r3, 31, 17, 31 -; P9LE-NEXT: mulld r4, r4, r5 -; P9LE-NEXT: rldicl r4, r4, 24, 40 +; P9LE-NEXT: mulhwu r4, r4, r5 +; P9LE-NEXT: srwi r4, r4, 8 ; P9LE-NEXT: mulli r4, r4, 654 ; P9LE-NEXT: subf r3, r4, r3 ; P9LE-NEXT: xxswapd v4, vs0 @@ -1017,44 +926,41 @@ ; P9BE: # %bb.0: ; P9BE-NEXT: li r3, 6 ; P9BE-NEXT: vextuhlx r3, r3, v2 +; P9BE-NEXT: lis r4, 24749 +; P9BE-NEXT: ori r4, r4, 47143 ; P9BE-NEXT: clrlwi r3, r3, 16 -; P9BE-NEXT: lis r5, 24749 -; P9BE-NEXT: ori r5, r5, 47143 -; P9BE-NEXT: clrldi r4, r3, 32 -; P9BE-NEXT: mulld r4, r4, r5 -; P9BE-NEXT: li r5, 0 -; P9BE-NEXT: oris r6, r5, 45590 -; P9BE-NEXT: oris r5, r5, 51306 -; P9BE-NEXT: ori r6, r6, 17097 -; P9BE-NEXT: ori r5, r5, 30865 -; P9BE-NEXT: rldicl r4, r4, 21, 43 +; P9BE-NEXT: mulhwu r4, r3, r4 +; P9BE-NEXT: srwi r4, r4, 11 ; P9BE-NEXT: mulli r4, r4, 5423 ; P9BE-NEXT: subf r3, r4, r3 +; P9BE-NEXT: lis r4, -19946 ; P9BE-NEXT: sldi r3, r3, 48 ; P9BE-NEXT: mtvsrd v3, r3 ; P9BE-NEXT: li r3, 4 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: clrlwi r3, r3, 16 -; P9BE-NEXT: clrldi r4, r3, 32 -; P9BE-NEXT: mulld r4, r4, r6 -; P9BE-NEXT: rldicl r4, r4, 28, 36 +; P9BE-NEXT: ori r4, r4, 17097 +; P9BE-NEXT: mulhwu r4, r3, r4 +; P9BE-NEXT: srwi r4, r4, 4 ; P9BE-NEXT: mulli r4, r4, 23 ; P9BE-NEXT: subf r3, r4, r3 ; P9BE-NEXT: sldi r3, r3, 48 ; P9BE-NEXT: mtvsrd v4, r3 ; P9BE-NEXT: li r3, 2 ; P9BE-NEXT: vextuhlx r3, r3, v2 +; P9BE-NEXT: lis r5, -14230 +; P9BE-NEXT: ori r5, r5, 30865 +; P9BE-NEXT: vmrghh v3, v4, v3 ; P9BE-NEXT: clrlwi r4, r3, 16 ; P9BE-NEXT: rlwinm r3, r3, 31, 17, 31 -; P9BE-NEXT: mulld r3, r3, r5 -; P9BE-NEXT: rldicl r3, r3, 24, 40 +; P9BE-NEXT: mulhwu r3, r3, r5 +; P9BE-NEXT: srwi r3, r3, 8 ; P9BE-NEXT: mulli r3, r3, 654 ; P9BE-NEXT: subf r3, r3, r4 ; P9BE-NEXT: sldi r3, r3, 48 ; P9BE-NEXT: mtvsrd v2, r3 ; P9BE-NEXT: li r3, 0 ; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: vmrghh v3, v4, v3 ; P9BE-NEXT: mtvsrd v4, r3 ; P9BE-NEXT: vmrghh v2, v4, v2 ; P9BE-NEXT: vmrghw v2, v2, v3 @@ -1063,35 +969,34 @@ ; P8LE-LABEL: dont_fold_urem_one: ; P8LE: # %bb.0: ; P8LE-NEXT: xxswapd vs0, v2 -; P8LE-NEXT: li r3, 0 -; P8LE-NEXT: lis r8, 24749 +; P8LE-NEXT: lis r3, -19946 +; P8LE-NEXT: lis r7, 24749 +; P8LE-NEXT: lis r9, -14230 ; P8LE-NEXT: xxlxor v5, v5, v5 -; P8LE-NEXT: oris r5, r3, 45590 -; P8LE-NEXT: ori r8, r8, 47143 -; P8LE-NEXT: oris r3, r3, 51306 -; P8LE-NEXT: ori r5, r5, 17097 -; P8LE-NEXT: ori r3, r3, 30865 +; P8LE-NEXT: ori r3, r3, 17097 +; P8LE-NEXT: ori r7, r7, 47143 +; P8LE-NEXT: ori r9, r9, 30865 ; P8LE-NEXT: mffprd r4, f0 -; P8LE-NEXT: rldicl r6, r4, 32, 48 -; P8LE-NEXT: rldicl r7, r4, 16, 48 -; P8LE-NEXT: rlwinm r9, r6, 0, 16, 31 +; P8LE-NEXT: rldicl r5, r4, 32, 48 +; P8LE-NEXT: rldicl r6, r4, 16, 48 +; P8LE-NEXT: clrlwi r8, r5, 16 ; P8LE-NEXT: rldicl r4, r4, 48, 48 -; P8LE-NEXT: mulld r5, r9, r5 -; P8LE-NEXT: rlwinm r9, r7, 0, 16, 31 -; P8LE-NEXT: mulld r8, r9, r8 -; P8LE-NEXT: rlwinm r9, r4, 31, 17, 31 -; P8LE-NEXT: mulld r3, r9, r3 -; P8LE-NEXT: rldicl r5, r5, 28, 36 -; P8LE-NEXT: rldicl r8, r8, 21, 43 -; P8LE-NEXT: mulli r5, r5, 23 -; P8LE-NEXT: rldicl r3, r3, 24, 40 -; P8LE-NEXT: mulli r8, r8, 5423 -; P8LE-NEXT: mulli r3, r3, 654 -; P8LE-NEXT: subf r5, r5, r6 -; P8LE-NEXT: subf r6, r8, r7 -; P8LE-NEXT: mtfprd f0, r5 -; P8LE-NEXT: subf r3, r3, r4 -; P8LE-NEXT: mtfprd f1, r6 +; P8LE-NEXT: mulhwu r3, r8, r3 +; P8LE-NEXT: clrlwi r8, r6, 16 +; P8LE-NEXT: mulhwu r7, r8, r7 +; P8LE-NEXT: rlwinm r8, r4, 31, 17, 31 +; P8LE-NEXT: mulhwu r8, r8, r9 +; P8LE-NEXT: srwi r3, r3, 4 +; P8LE-NEXT: srwi r7, r7, 11 +; P8LE-NEXT: mulli r3, r3, 23 +; P8LE-NEXT: srwi r8, r8, 8 +; P8LE-NEXT: mulli r7, r7, 5423 +; P8LE-NEXT: mulli r8, r8, 654 +; P8LE-NEXT: subf r3, r3, r5 +; P8LE-NEXT: subf r5, r7, r6 +; P8LE-NEXT: mtfprd f0, r3 +; P8LE-NEXT: subf r3, r8, r4 +; P8LE-NEXT: mtfprd f1, r5 ; P8LE-NEXT: mtfprd f2, r3 ; P8LE-NEXT: xxswapd v2, vs0 ; P8LE-NEXT: xxswapd v3, vs1 @@ -1104,45 +1009,42 @@ ; P8BE-LABEL: dont_fold_urem_one: ; P8BE: # %bb.0: ; P8BE-NEXT: mfvsrd r4, v2 -; P8BE-NEXT: li r3, 0 -; P8BE-NEXT: lis r8, 24749 -; P8BE-NEXT: oris r6, r3, 51306 -; P8BE-NEXT: ori r8, r8, 47143 -; P8BE-NEXT: oris r3, r3, 45590 -; P8BE-NEXT: rldicl r5, r4, 32, 48 -; P8BE-NEXT: clrldi r7, r4, 48 -; P8BE-NEXT: ori r6, r6, 30865 -; P8BE-NEXT: ori r3, r3, 17097 -; P8BE-NEXT: rldicl r4, r4, 48, 48 -; P8BE-NEXT: rlwinm r9, r5, 31, 17, 31 -; P8BE-NEXT: clrlwi r7, r7, 16 +; P8BE-NEXT: lis r3, 24749 +; P8BE-NEXT: lis r7, -19946 +; P8BE-NEXT: lis r8, -14230 +; P8BE-NEXT: ori r3, r3, 47143 +; P8BE-NEXT: ori r7, r7, 17097 +; P8BE-NEXT: ori r8, r8, 30865 +; P8BE-NEXT: clrldi r5, r4, 48 +; P8BE-NEXT: rldicl r6, r4, 48, 48 +; P8BE-NEXT: rldicl r4, r4, 32, 48 ; P8BE-NEXT: clrlwi r5, r5, 16 +; P8BE-NEXT: clrlwi r6, r6, 16 +; P8BE-NEXT: mulhwu r3, r5, r3 +; P8BE-NEXT: rlwinm r9, r4, 31, 17, 31 ; P8BE-NEXT: clrlwi r4, r4, 16 -; P8BE-NEXT: mulld r6, r9, r6 -; P8BE-NEXT: clrldi r9, r7, 32 -; P8BE-NEXT: mulld r8, r9, r8 -; P8BE-NEXT: clrldi r9, r4, 32 -; P8BE-NEXT: mulld r3, r9, r3 +; P8BE-NEXT: mulhwu r7, r6, r7 +; P8BE-NEXT: mulhwu r8, r9, r8 ; P8BE-NEXT: li r9, 0 -; P8BE-NEXT: rldicl r6, r6, 24, 40 -; P8BE-NEXT: mulli r6, r6, 654 -; P8BE-NEXT: rldicl r8, r8, 21, 43 -; P8BE-NEXT: rldicl r3, r3, 28, 36 -; P8BE-NEXT: mulli r8, r8, 5423 -; P8BE-NEXT: mulli r3, r3, 23 -; P8BE-NEXT: subf r5, r6, r5 -; P8BE-NEXT: sldi r6, r9, 48 -; P8BE-NEXT: mtvsrd v2, r6 -; P8BE-NEXT: sldi r5, r5, 48 -; P8BE-NEXT: subf r6, r8, r7 -; P8BE-NEXT: mtvsrd v3, r5 -; P8BE-NEXT: subf r3, r3, r4 -; P8BE-NEXT: sldi r4, r6, 48 +; P8BE-NEXT: srwi r3, r3, 11 +; P8BE-NEXT: srwi r7, r7, 4 +; P8BE-NEXT: mulli r3, r3, 5423 +; P8BE-NEXT: srwi r8, r8, 8 +; P8BE-NEXT: mulli r7, r7, 23 +; P8BE-NEXT: mulli r8, r8, 654 +; P8BE-NEXT: subf r3, r3, r5 +; P8BE-NEXT: sldi r5, r9, 48 +; P8BE-NEXT: mtvsrd v2, r5 +; P8BE-NEXT: subf r5, r7, r6 ; P8BE-NEXT: sldi r3, r3, 48 -; P8BE-NEXT: mtvsrd v4, r4 +; P8BE-NEXT: subf r4, r8, r4 +; P8BE-NEXT: sldi r5, r5, 48 +; P8BE-NEXT: mtvsrd v3, r3 +; P8BE-NEXT: sldi r3, r4, 48 +; P8BE-NEXT: mtvsrd v4, r5 ; P8BE-NEXT: mtvsrd v5, r3 -; P8BE-NEXT: vmrghh v2, v2, v3 -; P8BE-NEXT: vmrghh v3, v5, v4 +; P8BE-NEXT: vmrghh v3, v4, v3 +; P8BE-NEXT: vmrghh v2, v2, v5 ; P8BE-NEXT: vmrghw v2, v2, v3 ; P8BE-NEXT: blr %1 = urem <4 x i16> %x,