diff --git a/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp b/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp --- a/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp +++ b/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp @@ -49,10 +49,103 @@ void PPCPreRASchedStrategy::tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const { - GenericScheduler::tryCandidate(Cand, TryCand, Zone); + // From GenericScheduler::tryCandidate - if (!Cand.isValid() || !Zone) + // Initialize the candidate if needed. + if (!Cand.isValid()) { + TryCand.Reason = NodeOrder; return; + } + + // Bias PhysReg Defs and copies to their uses and defined respectively. + if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop), + biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg)) + return; + + // Avoid exceeding the target's limit. + if (DAG->isTrackingPressure() && + tryPressure(TryCand.RPDelta.Excess, Cand.RPDelta.Excess, TryCand, Cand, + RegExcess, TRI, DAG->MF)) + return; + + // Avoid increasing the max critical pressure in the scheduled region. + if (DAG->isTrackingPressure() && + tryPressure(TryCand.RPDelta.CriticalMax, Cand.RPDelta.CriticalMax, + TryCand, Cand, RegCritical, TRI, DAG->MF)) + return; + + // We only compare a subset of features when comparing nodes between + // Top and Bottom boundary. Some properties are simply incomparable, in many + // other instances we should only override the other boundary if something + // is a clear good pick on one boundary. Skip heuristics that are more + // "tie-breaking" in nature. + bool SameBoundary = Zone != nullptr; + if (SameBoundary) { + // For loops that are acyclic path limited, aggressively schedule for + // latency. Within an single cycle, whenever CurrMOps > 0, allow normal + // heuristics to take precedence. + if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() && + tryLatency(TryCand, Cand, *Zone)) + return; + + // Prioritize instructions that read unbuffered resources by stall cycles. + if (tryLess(Zone->getLatencyStallCycles(TryCand.SU), + Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall)) + return; + } + + // Keep clustered nodes together to encourage downstream peephole + // optimizations which may reduce resource requirements. + // + // This is a best effort to set things up for a post-RA pass. Optimizations + // like generating loads of multiple registers should ideally be done within + // the scheduler pass by combining the loads during DAG postprocessing. + const SUnit *CandNextClusterSU = + Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred(); + const SUnit *TryCandNextClusterSU = + TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred(); + if (tryGreater(TryCand.SU == TryCandNextClusterSU, + Cand.SU == CandNextClusterSU, TryCand, Cand, Cluster)) + return; + + if (SameBoundary) { + // Weak edges are for clustering and other constraints. + if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop), + getWeakLeft(Cand.SU, Cand.AtTop), TryCand, Cand, Weak)) + return; + } + + // Avoid increasing the max pressure of the entire region. + if (DAG->isTrackingPressure() && + tryPressure(TryCand.RPDelta.CurrentMax, Cand.RPDelta.CurrentMax, TryCand, + Cand, RegMax, TRI, DAG->MF)) + return; + + if (SameBoundary) { + // Avoid critical resource consumption and balance the schedule. + TryCand.initResourceDelta(DAG, SchedModel); + if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, + TryCand, Cand, ResourceReduce)) + return; + if (tryGreater(TryCand.ResDelta.DemandedResources, + Cand.ResDelta.DemandedResources, TryCand, Cand, + ResourceDemand)) + return; + + // Avoid serializing long latency dependence chains. + // For acyclic path limited loops, latency was already checked above. + if (!RegionPolicy.DisableLatencyHeuristic && TryCand.Policy.ReduceLatency && + !Rem.IsAcyclicLatencyLimited && tryLatency(TryCand, Cand, *Zone)) + return; + + // Fall through to original instruction order. + if ((Zone->isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) || + (!Zone->isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) { + TryCand.Reason = NodeOrder; + } + } + + // GenericScheduler::tryCandidate end // Add powerpc specific heuristic only when TryCand isn't selected or // selected as node order. @@ -61,8 +154,10 @@ // There are some benefits to schedule the ADDI before the load to hide the // latency, as RA may create a true dependency between the load and addi. - if (biasAddiLoadCandidate(Cand, TryCand, *Zone)) - return; + if (SameBoundary) { + if (biasAddiLoadCandidate(Cand, TryCand, *Zone)) + return; + } } bool PPCPostRASchedStrategy::biasAddiCandidate(SchedCandidate &Cand, @@ -79,11 +174,44 @@ void PPCPostRASchedStrategy::tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand) { - PostGenericScheduler::tryCandidate(Cand, TryCand); + // From PostGenericScheduler::tryCandidate + + // Initialize the candidate if needed. + if (!Cand.isValid()) { + TryCand.Reason = NodeOrder; + return; + } + + // Prioritize instructions that read unbuffered resources by stall cycles. + if (tryLess(Top.getLatencyStallCycles(TryCand.SU), + Top.getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall)) + return; - if (!Cand.isValid()) + // Keep clustered nodes together. + if (tryGreater(TryCand.SU == DAG->getNextClusterSucc(), + Cand.SU == DAG->getNextClusterSucc(), TryCand, Cand, Cluster)) return; + // Avoid critical resource consumption and balance the schedule. + if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, + TryCand, Cand, ResourceReduce)) + return; + if (tryGreater(TryCand.ResDelta.DemandedResources, + Cand.ResDelta.DemandedResources, TryCand, Cand, + ResourceDemand)) + return; + + // Avoid serializing long latency dependence chains. + if (Cand.Policy.ReduceLatency && tryLatency(TryCand, Cand, Top)) { + return; + } + + // Fall through to original instruction order. + if (TryCand.SU->NodeNum < Cand.SU->NodeNum) + TryCand.Reason = NodeOrder; + + // PostGenericScheduler::tryCandidate end + // Add powerpc post ra specific heuristic only when TryCand isn't selected or // selected as node order. if (TryCand.Reason != NodeOrder && TryCand.Reason != NoCand) diff --git a/llvm/test/CodeGen/PowerPC/botheightreduce.mir b/llvm/test/CodeGen/PowerPC/botheightreduce.mir --- a/llvm/test/CodeGen/PowerPC/botheightreduce.mir +++ b/llvm/test/CodeGen/PowerPC/botheightreduce.mir @@ -26,7 +26,6 @@ ; CHECK: [[LI8_6:%[0-9]+]]:g8rc = LI8 7 ; CHECK: bb.1: ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) - ; CHECK: [[ADDI8_1:%[0-9]+]]:g8rc = ADDI8 [[ADDI8_]], 1 ; CHECK: [[LD:%[0-9]+]]:g8rc = LD 0, [[ADDI8_]] :: (load 8) ; CHECK: [[LDX:%[0-9]+]]:g8rc = LDX [[ADDI8_]], [[LI8_]] :: (load 8) ; CHECK: [[LDX1:%[0-9]+]]:g8rc = LDX [[ADDI8_]], [[LI8_3]] :: (load 8) @@ -34,9 +33,10 @@ ; CHECK: [[LDX2:%[0-9]+]]:g8rc = LDX [[ADDI8_]], [[LI8_4]] :: (load 8) ; CHECK: [[LDX3:%[0-9]+]]:g8rc = LDX [[ADDI8_]], [[LI8_5]] :: (load 8) ; CHECK: [[LDX4:%[0-9]+]]:g8rc = LDX [[ADDI8_]], [[LI8_6]] :: (load 8) - ; CHECK: [[LDX5:%[0-9]+]]:g8rc = LDX [[ADDI8_]], [[LI8_2]] :: (load 8) - ; CHECK: [[MULLD:%[0-9]+]]:g8rc = MULLD [[LDX]], [[LD]] ; CHECK: [[LD2:%[0-9]+]]:g8rc = LD 8, [[ADDI8_]] :: (load 8) + ; CHECK: [[MULLD:%[0-9]+]]:g8rc = MULLD [[LDX]], [[LD]] + ; CHECK: [[LDX5:%[0-9]+]]:g8rc = LDX [[ADDI8_]], [[LI8_2]] :: (load 8) + ; CHECK: [[ADDI8_1:%[0-9]+]]:g8rc = ADDI8 [[ADDI8_]], 1 ; CHECK: [[MULLD1:%[0-9]+]]:g8rc = MULLD [[MULLD]], [[LDX5]] ; CHECK: [[MULLD2:%[0-9]+]]:g8rc = MULLD [[MULLD1]], [[LDX1]] ; CHECK: [[MULLD3:%[0-9]+]]:g8rc = MULLD [[MULLD2]], [[LD1]] diff --git a/llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll b/llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll --- a/llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll +++ b/llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll @@ -116,15 +116,14 @@ ; CHECK-NEXT: li r3, 0 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB1_2: -; CHECK-NEXT: ldx r9, r6, r7 -; CHECK-NEXT: ld r10, 0(r6) -; CHECK-NEXT: ldx r11, r6, r5 -; CHECK-NEXT: addi r8, r6, 1 -; CHECK-NEXT: ld r6, 4(r6) -; CHECK-NEXT: mulld r9, r10, r9 -; CHECK-NEXT: mulld r9, r9, r11 -; CHECK-NEXT: maddld r3, r9, r6, r3 -; CHECK-NEXT: mr r6, r8 +; CHECK-NEXT: ldx r8, r6, r7 +; CHECK-NEXT: ld r9, 0(r6) +; CHECK-NEXT: ldx r10, r6, r5 +; CHECK-NEXT: ld r11, 4(r6) +; CHECK-NEXT: addi r6, r6, 1 +; CHECK-NEXT: mulld r8, r9, r8 +; CHECK-NEXT: mulld r8, r8, r10 +; CHECK-NEXT: maddld r3, r8, r11, r3 ; CHECK-NEXT: bdnz .LBB1_2 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: add r3, r3, r4 @@ -217,25 +216,24 @@ ; CHECK-NEXT: li r3, 0 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB2_2: -; CHECK-NEXT: ldx r12, r9, r6 -; CHECK-NEXT: ld r0, 0(r9) -; CHECK-NEXT: ldx r30, r9, r5 -; CHECK-NEXT: ldx r29, r9, r7 -; CHECK-NEXT: addi r11, r9, 1 -; CHECK-NEXT: mulld r12, r0, r12 -; CHECK-NEXT: ld r28, 4(r9) -; CHECK-NEXT: ldx r27, r9, r8 -; CHECK-NEXT: ld r26, 12(r9) -; CHECK-NEXT: ld r25, 8(r9) -; CHECK-NEXT: ldx r9, r9, r10 -; CHECK-NEXT: mulld r12, r12, r30 -; CHECK-NEXT: mulld r12, r12, r29 -; CHECK-NEXT: mulld r12, r12, r28 -; CHECK-NEXT: mulld r12, r12, r27 -; CHECK-NEXT: mulld r12, r12, r26 -; CHECK-NEXT: mulld r12, r12, r25 -; CHECK-NEXT: maddld r3, r12, r9, r3 -; CHECK-NEXT: mr r9, r11 +; CHECK-NEXT: ldx r11, r9, r6 +; CHECK-NEXT: ld r12, 0(r9) +; CHECK-NEXT: ldx r0, r9, r5 +; CHECK-NEXT: ldx r30, r9, r7 +; CHECK-NEXT: mulld r11, r12, r11 +; CHECK-NEXT: ld r29, 4(r9) +; CHECK-NEXT: ldx r28, r9, r8 +; CHECK-NEXT: ld r27, 12(r9) +; CHECK-NEXT: ld r26, 8(r9) +; CHECK-NEXT: ldx r25, r9, r10 +; CHECK-NEXT: addi r9, r9, 1 +; CHECK-NEXT: mulld r11, r11, r0 +; CHECK-NEXT: mulld r11, r11, r30 +; CHECK-NEXT: mulld r11, r11, r29 +; CHECK-NEXT: mulld r11, r11, r28 +; CHECK-NEXT: mulld r11, r11, r27 +; CHECK-NEXT: mulld r11, r11, r26 +; CHECK-NEXT: maddld r3, r11, r25, r3 ; CHECK-NEXT: bdnz .LBB2_2 ; CHECK-NEXT: b .LBB2_4 ; CHECK-NEXT: .LBB2_3: @@ -624,10 +622,10 @@ ; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-NEXT: beq cr0, .LBB6_8 ; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: addis r5, r2, .LC0@toc@ha ; CHECK-NEXT: cmpldi r4, 1 ; CHECK-NEXT: li r7, 1 ; CHECK-NEXT: addi r6, r3, 4009 -; CHECK-NEXT: addis r5, r2, .LC0@toc@ha ; CHECK-NEXT: ld r5, .LC0@toc@l(r5) ; CHECK-NEXT: iselgt r8, r4, r7 ; CHECK-NEXT: lis r4, -21846 @@ -639,11 +637,11 @@ ; CHECK-NEXT: li r30, 1 ; CHECK-NEXT: ld r5, 0(r5) ; CHECK-NEXT: mtctr r8 -; CHECK-NEXT: li r8, -9 -; CHECK-NEXT: addi r5, r5, -1 ; CHECK-NEXT: ori r4, r4, 43691 +; CHECK-NEXT: li r8, -9 ; CHECK-NEXT: li r29, 1 ; CHECK-NEXT: li r28, 1 +; CHECK-NEXT: addi r5, r5, -1 ; CHECK-NEXT: b .LBB6_4 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB6_2: @@ -652,8 +650,8 @@ ; CHECK-NEXT: ld r0, -8(r6) ; CHECK-NEXT: add r29, r0, r29 ; CHECK-NEXT: .LBB6_3: -; CHECK-NEXT: addi r6, r6, 1 ; CHECK-NEXT: mulld r0, r29, r28 +; CHECK-NEXT: addi r6, r6, 1 ; CHECK-NEXT: mulld r0, r0, r30 ; CHECK-NEXT: mulld r0, r0, r12 ; CHECK-NEXT: mulld r0, r0, r11 @@ -802,8 +800,8 @@ ; CHECK-NEXT: cmpwi r4, 1 ; CHECK-NEXT: blt cr0, .LBB7_4 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: addi r3, r3, 4002 ; CHECK-NEXT: clrldi r4, r4, 32 +; CHECK-NEXT: addi r3, r3, 4002 ; CHECK-NEXT: xxlxor f1, f1, f1 ; CHECK-NEXT: mtctr r4 ; CHECK-NEXT: li r4, -1 @@ -884,8 +882,8 @@ ; CHECK-NEXT: cmpwi r4, 1 ; CHECK-NEXT: blt cr0, .LBB8_4 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: addi r3, r3, 4002 ; CHECK-NEXT: clrldi r4, r4, 32 +; CHECK-NEXT: addi r3, r3, 4002 ; CHECK-NEXT: xxlxor f1, f1, f1 ; CHECK-NEXT: mtctr r4 ; CHECK-NEXT: li r4, -1 diff --git a/llvm/test/CodeGen/PowerPC/lsr-ctrloop.ll b/llvm/test/CodeGen/PowerPC/lsr-ctrloop.ll --- a/llvm/test/CodeGen/PowerPC/lsr-ctrloop.ll +++ b/llvm/test/CodeGen/PowerPC/lsr-ctrloop.ll @@ -14,9 +14,9 @@ define void @foo(float* nocapture %data, float %d) { ; CHECK-LABEL: foo: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: xscvdpspn 0, 1 ; CHECK-NEXT: li 5, 83 ; CHECK-NEXT: addi 4, 3, 192 -; CHECK-NEXT: xscvdpspn 0, 1 ; CHECK-NEXT: mtctr 5 ; CHECK-NEXT: xxspltw 0, 0, 0 ; CHECK-NEXT: .p2align 4 diff --git a/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll b/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll --- a/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll +++ b/llvm/test/CodeGen/PowerPC/mma-intrinsics.ll @@ -400,9 +400,9 @@ ; CHECK-NEXT: .LBB9_2: # %for.body ; CHECK-NEXT: # ; CHECK-NEXT: rldic r7, r6, 4, 28 -; CHECK-NEXT: addi r6, r6, 6 ; CHECK-NEXT: xxsetaccz acc2 ; CHECK-NEXT: xxsetaccz acc1 +; CHECK-NEXT: addi r6, r6, 6 ; CHECK-NEXT: lxvx vs0, r5, r7 ; CHECK-NEXT: add r7, r5, r7 ; CHECK-NEXT: lxv vs1, 16(r7) @@ -414,8 +414,8 @@ ; CHECK-NEXT: lxv vs12, 64(r7) ; CHECK-NEXT: lxv vs13, 80(r7) ; CHECK-NEXT: rldic r7, r4, 6, 26 -; CHECK-NEXT: addi r4, r4, 3 ; CHECK-NEXT: xxsetaccz acc0 +; CHECK-NEXT: addi r4, r4, 3 ; CHECK-NEXT: xxmfacc acc1 ; CHECK-NEXT: xvf32gernp acc0, vs12, vs13 ; CHECK-NEXT: stxvx vs11, r3, r7 @@ -449,9 +449,9 @@ ; CHECK-BE-NEXT: .LBB9_2: # %for.body ; CHECK-BE-NEXT: # ; CHECK-BE-NEXT: rldic r7, r6, 4, 28 -; CHECK-BE-NEXT: addi r6, r6, 6 ; CHECK-BE-NEXT: xxsetaccz acc2 ; CHECK-BE-NEXT: xxsetaccz acc1 +; CHECK-BE-NEXT: addi r6, r6, 6 ; CHECK-BE-NEXT: lxvx vs0, r5, r7 ; CHECK-BE-NEXT: add r7, r5, r7 ; CHECK-BE-NEXT: lxv vs1, 16(r7) @@ -463,8 +463,8 @@ ; CHECK-BE-NEXT: lxv vs12, 64(r7) ; CHECK-BE-NEXT: lxv vs13, 80(r7) ; CHECK-BE-NEXT: rldic r7, r4, 6, 26 -; CHECK-BE-NEXT: addi r4, r4, 3 ; CHECK-BE-NEXT: xxsetaccz acc0 +; CHECK-BE-NEXT: addi r4, r4, 3 ; CHECK-BE-NEXT: xxmfacc acc1 ; CHECK-BE-NEXT: xvf32gernp acc0, vs12, vs13 ; CHECK-BE-NEXT: stxvx vs8, r3, r7 @@ -544,8 +544,7 @@ declare i32 @testRedundantPrimeUnprimeF() define void @testRedundantPrimeUnprime(<512 x i1>* %dst, <16 x i8> %vc) nounwind { ; CHECK-LABEL: testRedundantPrimeUnprime: -; CHECK: .localentry testRedundantPrimeUnprime, 1 -; CHECK-NEXT: # %bb.0: # %entry +; CHECK: # %bb.0: # %entry ; CHECK-NEXT: mflr r0 ; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-NEXT: std r0, 16(r1) diff --git a/llvm/test/CodeGen/PowerPC/mma-phi-accs.ll b/llvm/test/CodeGen/PowerPC/mma-phi-accs.ll --- a/llvm/test/CodeGen/PowerPC/mma-phi-accs.ll +++ b/llvm/test/CodeGen/PowerPC/mma-phi-accs.ll @@ -17,17 +17,17 @@ ; CHECK-NEXT: xxsetaccz acc0 ; CHECK-NEXT: blt cr0, .LBB0_3 ; CHECK-NEXT: # %bb.1: # %for.body.preheader -; CHECK-NEXT: clrldi r6, r5, 32 -; CHECK-NEXT: addi r5, r4, 32 -; CHECK-NEXT: addi r6, r6, -2 +; CHECK-NEXT: clrldi r5, r5, 32 ; CHECK-NEXT: lxv vs4, 0(r4) ; CHECK-NEXT: lxv vs5, 16(r4) -; CHECK-NEXT: mtctr r6 +; CHECK-NEXT: addi r4, r4, 32 +; CHECK-NEXT: addi r5, r5, -2 +; CHECK-NEXT: mtctr r5 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB0_2: # %for.body ; CHECK-NEXT: # -; CHECK-NEXT: lxv vs6, 0(r5) -; CHECK-NEXT: addi r5, r5, 16 +; CHECK-NEXT: lxv vs6, 0(r4) +; CHECK-NEXT: addi r4, r4, 16 ; CHECK-NEXT: xvf64gerpp acc0, vsp4, vs6 ; CHECK-NEXT: bdnz .LBB0_2 ; CHECK-NEXT: .LBB0_3: # %for.cond.cleanup @@ -44,17 +44,17 @@ ; CHECK-BE-NEXT: xxsetaccz acc0 ; CHECK-BE-NEXT: blt cr0, .LBB0_3 ; CHECK-BE-NEXT: # %bb.1: # %for.body.preheader -; CHECK-BE-NEXT: clrldi r6, r5, 32 -; CHECK-BE-NEXT: addi r5, r4, 32 -; CHECK-BE-NEXT: addi r6, r6, -2 +; CHECK-BE-NEXT: clrldi r5, r5, 32 ; CHECK-BE-NEXT: lxv vs4, 0(r4) ; CHECK-BE-NEXT: lxv vs5, 16(r4) -; CHECK-BE-NEXT: mtctr r6 +; CHECK-BE-NEXT: addi r4, r4, 32 +; CHECK-BE-NEXT: addi r5, r5, -2 +; CHECK-BE-NEXT: mtctr r5 ; CHECK-BE-NEXT: .p2align 4 ; CHECK-BE-NEXT: .LBB0_2: # %for.body ; CHECK-BE-NEXT: # -; CHECK-BE-NEXT: lxv vs6, 0(r5) -; CHECK-BE-NEXT: addi r5, r5, 16 +; CHECK-BE-NEXT: lxv vs6, 0(r4) +; CHECK-BE-NEXT: addi r4, r4, 16 ; CHECK-BE-NEXT: xvf64gerpp acc0, vsp4, vs6 ; CHECK-BE-NEXT: bdnz .LBB0_2 ; CHECK-BE-NEXT: .LBB0_3: # %for.cond.cleanup @@ -115,8 +115,8 @@ ; CHECK-NEXT: xvf64ger acc0, vsp4, vs6 ; CHECK-NEXT: blt cr0, .LBB1_3 ; CHECK-NEXT: # %bb.1: # %for.body.preheader -; CHECK-NEXT: addi r4, r4, 48 ; CHECK-NEXT: clrldi r5, r5, 32 +; CHECK-NEXT: addi r4, r4, 48 ; CHECK-NEXT: addi r5, r5, -3 ; CHECK-NEXT: mtctr r5 ; CHECK-NEXT: .p2align 4 @@ -143,8 +143,8 @@ ; CHECK-BE-NEXT: xvf64ger acc0, vsp4, vs6 ; CHECK-BE-NEXT: blt cr0, .LBB1_3 ; CHECK-BE-NEXT: # %bb.1: # %for.body.preheader -; CHECK-BE-NEXT: addi r4, r4, 48 ; CHECK-BE-NEXT: clrldi r5, r5, 32 +; CHECK-BE-NEXT: addi r4, r4, 48 ; CHECK-BE-NEXT: addi r5, r5, -3 ; CHECK-BE-NEXT: mtctr r5 ; CHECK-BE-NEXT: .p2align 4 diff --git a/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll b/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll --- a/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll +++ b/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll @@ -9,10 +9,9 @@ define void @foo(i32* %.m, i32* %.n, [0 x %_elem_type_of_a]* %.a, [0 x %_elem_type_of_x]* %.x, i32* %.l, <2 x double>* %.vy01, <2 x double>* %.vy02, <2 x double>* %.vy03, <2 x double>* %.vy04, <2 x double>* %.vy05, <2 x double>* %.vy06, <2 x double>* %.vy07, <2 x double>* %.vy08, <2 x double>* %.vy09, <2 x double>* %.vy0a, <2 x double>* %.vy0b, <2 x double>* %.vy0c, <2 x double>* %.vy21, <2 x double>* %.vy22, <2 x double>* %.vy23, <2 x double>* %.vy24, <2 x double>* %.vy25, <2 x double>* %.vy26, <2 x double>* %.vy27, <2 x double>* %.vy28, <2 x double>* %.vy29, <2 x double>* %.vy2a, <2 x double>* %.vy2b, <2 x double>* %.vy2c) { ; CHECK-LABEL: foo: -; CHECK: .localentry foo, 1 -; CHECK-NEXT: # %bb.0: # %entry -; CHECK-NEXT: stdu 1, -448(1) -; CHECK-NEXT: .cfi_def_cfa_offset 448 +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: stdu 1, -480(1) +; CHECK-NEXT: .cfi_def_cfa_offset 480 ; CHECK-NEXT: .cfi_offset r14, -256 ; CHECK-NEXT: .cfi_offset r15, -248 ; CHECK-NEXT: .cfi_offset r16, -240 @@ -46,308 +45,318 @@ ; CHECK-NEXT: .cfi_offset f30, -16 ; CHECK-NEXT: .cfi_offset f31, -8 ; CHECK-NEXT: lwz 4, 0(4) -; CHECK-NEXT: std 14, 192(1) # 8-byte Folded Spill -; CHECK-NEXT: std 15, 200(1) # 8-byte Folded Spill +; CHECK-NEXT: std 14, 224(1) # 8-byte Folded Spill +; CHECK-NEXT: std 15, 232(1) # 8-byte Folded Spill ; CHECK-NEXT: cmpwi 4, 1 -; CHECK-NEXT: std 16, 208(1) # 8-byte Folded Spill -; CHECK-NEXT: std 17, 216(1) # 8-byte Folded Spill -; CHECK-NEXT: std 18, 224(1) # 8-byte Folded Spill -; CHECK-NEXT: std 19, 232(1) # 8-byte Folded Spill -; CHECK-NEXT: std 20, 240(1) # 8-byte Folded Spill -; CHECK-NEXT: std 21, 248(1) # 8-byte Folded Spill -; CHECK-NEXT: std 22, 256(1) # 8-byte Folded Spill -; CHECK-NEXT: std 23, 264(1) # 8-byte Folded Spill -; CHECK-NEXT: std 24, 272(1) # 8-byte Folded Spill -; CHECK-NEXT: std 25, 280(1) # 8-byte Folded Spill -; CHECK-NEXT: std 26, 288(1) # 8-byte Folded Spill -; CHECK-NEXT: std 27, 296(1) # 8-byte Folded Spill -; CHECK-NEXT: std 28, 304(1) # 8-byte Folded Spill -; CHECK-NEXT: std 29, 312(1) # 8-byte Folded Spill -; CHECK-NEXT: std 30, 320(1) # 8-byte Folded Spill -; CHECK-NEXT: std 31, 328(1) # 8-byte Folded Spill -; CHECK-NEXT: stfd 18, 336(1) # 8-byte Folded Spill -; CHECK-NEXT: stfd 19, 344(1) # 8-byte Folded Spill -; CHECK-NEXT: stfd 20, 352(1) # 8-byte Folded Spill -; CHECK-NEXT: stfd 21, 360(1) # 8-byte Folded Spill -; CHECK-NEXT: stfd 22, 368(1) # 8-byte Folded Spill -; CHECK-NEXT: stfd 23, 376(1) # 8-byte Folded Spill -; CHECK-NEXT: stfd 24, 384(1) # 8-byte Folded Spill -; CHECK-NEXT: stfd 25, 392(1) # 8-byte Folded Spill -; CHECK-NEXT: stfd 26, 400(1) # 8-byte Folded Spill -; CHECK-NEXT: stfd 27, 408(1) # 8-byte Folded Spill -; CHECK-NEXT: stfd 28, 416(1) # 8-byte Folded Spill -; CHECK-NEXT: stfd 29, 424(1) # 8-byte Folded Spill -; CHECK-NEXT: stfd 30, 432(1) # 8-byte Folded Spill -; CHECK-NEXT: stfd 31, 440(1) # 8-byte Folded Spill +; CHECK-NEXT: std 16, 240(1) # 8-byte Folded Spill +; CHECK-NEXT: std 17, 248(1) # 8-byte Folded Spill +; CHECK-NEXT: std 18, 256(1) # 8-byte Folded Spill +; CHECK-NEXT: std 19, 264(1) # 8-byte Folded Spill +; CHECK-NEXT: std 20, 272(1) # 8-byte Folded Spill +; CHECK-NEXT: std 21, 280(1) # 8-byte Folded Spill +; CHECK-NEXT: std 22, 288(1) # 8-byte Folded Spill +; CHECK-NEXT: std 23, 296(1) # 8-byte Folded Spill +; CHECK-NEXT: std 24, 304(1) # 8-byte Folded Spill +; CHECK-NEXT: std 25, 312(1) # 8-byte Folded Spill +; CHECK-NEXT: std 26, 320(1) # 8-byte Folded Spill +; CHECK-NEXT: std 27, 328(1) # 8-byte Folded Spill +; CHECK-NEXT: std 28, 336(1) # 8-byte Folded Spill +; CHECK-NEXT: std 29, 344(1) # 8-byte Folded Spill +; CHECK-NEXT: std 30, 352(1) # 8-byte Folded Spill +; CHECK-NEXT: std 31, 360(1) # 8-byte Folded Spill +; CHECK-NEXT: stfd 18, 368(1) # 8-byte Folded Spill +; CHECK-NEXT: stfd 19, 376(1) # 8-byte Folded Spill +; CHECK-NEXT: stfd 20, 384(1) # 8-byte Folded Spill +; CHECK-NEXT: stfd 21, 392(1) # 8-byte Folded Spill +; CHECK-NEXT: stfd 22, 400(1) # 8-byte Folded Spill +; CHECK-NEXT: stfd 23, 408(1) # 8-byte Folded Spill +; CHECK-NEXT: stfd 24, 416(1) # 8-byte Folded Spill +; CHECK-NEXT: stfd 25, 424(1) # 8-byte Folded Spill +; CHECK-NEXT: stfd 26, 432(1) # 8-byte Folded Spill +; CHECK-NEXT: stfd 27, 440(1) # 8-byte Folded Spill +; CHECK-NEXT: stfd 28, 448(1) # 8-byte Folded Spill +; CHECK-NEXT: stfd 29, 456(1) # 8-byte Folded Spill +; CHECK-NEXT: stfd 30, 464(1) # 8-byte Folded Spill +; CHECK-NEXT: stfd 31, 472(1) # 8-byte Folded Spill ; CHECK-NEXT: blt 0, .LBB0_7 ; CHECK-NEXT: # %bb.1: # %_loop_1_do_.lr.ph -; CHECK-NEXT: lwz 3, 0(3) -; CHECK-NEXT: cmpwi 3, 1 +; CHECK-NEXT: mr 23, 5 +; CHECK-NEXT: lwz 5, 0(3) +; CHECK-NEXT: cmpwi 5, 1 ; CHECK-NEXT: blt 0, .LBB0_7 ; CHECK-NEXT: # %bb.2: # %_loop_1_do_.preheader -; CHECK-NEXT: addi 3, 3, 1 -; CHECK-NEXT: mr 24, 5 -; CHECK-NEXT: li 5, 9 -; CHECK-NEXT: mr 11, 7 -; CHECK-NEXT: ld 12, 640(1) -; CHECK-NEXT: std 9, 176(1) # 8-byte Folded Spill -; CHECK-NEXT: std 10, 184(1) # 8-byte Folded Spill -; CHECK-NEXT: mr 7, 6 -; CHECK-NEXT: ld 6, 544(1) -; CHECK-NEXT: lxv 1, 0(9) -; CHECK-NEXT: ld 9, 648(1) -; CHECK-NEXT: ld 29, 688(1) -; CHECK-NEXT: ld 28, 680(1) -; CHECK-NEXT: ld 2, 632(1) -; CHECK-NEXT: ld 26, 624(1) -; CHECK-NEXT: lxv 0, 0(10) -; CHECK-NEXT: cmpldi 3, 9 -; CHECK-NEXT: lxv 4, 0(8) -; CHECK-NEXT: ld 30, 664(1) -; CHECK-NEXT: ld 10, 704(1) -; CHECK-NEXT: ld 27, 672(1) -; CHECK-NEXT: ld 25, 616(1) -; CHECK-NEXT: ld 23, 608(1) -; CHECK-NEXT: ld 22, 600(1) -; CHECK-NEXT: ld 21, 592(1) -; CHECK-NEXT: ld 19, 584(1) -; CHECK-NEXT: ld 17, 576(1) -; CHECK-NEXT: iselgt 3, 3, 5 -; CHECK-NEXT: ld 5, 656(1) -; CHECK-NEXT: addi 3, 3, -2 -; CHECK-NEXT: lwa 20, 0(11) -; CHECK-NEXT: lxv 13, 0(12) -; CHECK-NEXT: std 6, 128(1) # 8-byte Folded Spill -; CHECK-NEXT: std 27, 136(1) # 8-byte Folded Spill -; CHECK-NEXT: lxv 2, 0(6) -; CHECK-NEXT: ld 6, 696(1) -; CHECK-NEXT: lxv 34, 0(2) -; CHECK-NEXT: lxv 7, 0(29) -; CHECK-NEXT: lxv 39, 0(17) -; CHECK-NEXT: lxv 38, 0(19) -; CHECK-NEXT: lxv 33, 0(21) -; CHECK-NEXT: lxv 32, 0(22) -; CHECK-NEXT: lxv 37, 0(23) -; CHECK-NEXT: lxv 36, 0(25) -; CHECK-NEXT: lxv 35, 0(26) -; CHECK-NEXT: lxv 11, 0(9) -; CHECK-NEXT: lxv 12, 0(30) -; CHECK-NEXT: rldicl 3, 3, 61, 3 -; CHECK-NEXT: addi 0, 3, 1 -; CHECK-NEXT: ld 3, 560(1) -; CHECK-NEXT: sldi 11, 20, 2 -; CHECK-NEXT: lxv 9, 0(5) -; CHECK-NEXT: lxv 10, 0(27) -; CHECK-NEXT: lxv 8, 0(28) -; CHECK-NEXT: lxv 6, 0(6) -; CHECK-NEXT: lxv 5, 0(10) -; CHECK-NEXT: lxv 3, 0(3) -; CHECK-NEXT: std 3, 96(1) # 8-byte Folded Spill -; CHECK-NEXT: std 12, 104(1) # 8-byte Folded Spill -; CHECK-NEXT: sldi 3, 20, 4 -; CHECK-NEXT: add 12, 20, 11 -; CHECK-NEXT: std 8, 168(1) # 8-byte Folded Spill -; CHECK-NEXT: std 6, 160(1) # 8-byte Folded Spill -; CHECK-NEXT: ld 8, 552(1) -; CHECK-NEXT: sldi 18, 20, 1 +; CHECK-NEXT: addi 5, 5, 1 +; CHECK-NEXT: li 20, 9 +; CHECK-NEXT: ld 28, 728(1) +; CHECK-NEXT: ld 19, 616(1) +; CHECK-NEXT: lwa 3, 0(7) +; CHECK-NEXT: ld 7, 688(1) +; CHECK-NEXT: ld 12, 680(1) +; CHECK-NEXT: ld 11, 672(1) +; CHECK-NEXT: ld 2, 664(1) +; CHECK-NEXT: ld 29, 736(1) +; CHECK-NEXT: cmpldi 5, 9 +; CHECK-NEXT: ld 27, 720(1) +; CHECK-NEXT: ld 26, 712(1) +; CHECK-NEXT: ld 25, 704(1) +; CHECK-NEXT: ld 24, 696(1) +; CHECK-NEXT: iselgt 5, 5, 20 +; CHECK-NEXT: ld 30, 656(1) +; CHECK-NEXT: ld 22, 648(1) +; CHECK-NEXT: ld 21, 640(1) +; CHECK-NEXT: ld 20, 632(1) +; CHECK-NEXT: ld 18, 608(1) +; CHECK-NEXT: ld 17, 600(1) +; CHECK-NEXT: ld 16, 592(1) +; CHECK-NEXT: ld 14, 584(1) +; CHECK-NEXT: sldi 0, 3, 2 +; CHECK-NEXT: std 5, 216(1) # 8-byte Folded Spill +; CHECK-NEXT: std 28, 208(1) # 8-byte Folded Spill +; CHECK-NEXT: mr 5, 4 +; CHECK-NEXT: ld 4, 624(1) +; CHECK-NEXT: std 19, 96(1) # 8-byte Folded Spill +; CHECK-NEXT: std 4, 104(1) # 8-byte Folded Spill +; CHECK-NEXT: lxv 11, 0(4) +; CHECK-NEXT: mr 4, 5 +; CHECK-NEXT: ld 5, 216(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 15, 576(1) +; CHECK-NEXT: sldi 31, 3, 1 +; CHECK-NEXT: std 8, 32(1) # 8-byte Folded Spill +; CHECK-NEXT: std 9, 40(1) # 8-byte Folded Spill ; CHECK-NEXT: lxv 41, 0(8) -; CHECK-NEXT: add 3, 3, 24 -; CHECK-NEXT: addi 16, 3, 32 -; CHECK-NEXT: sldi 3, 20, 3 -; CHECK-NEXT: std 9, 112(1) # 8-byte Folded Spill -; CHECK-NEXT: std 5, 120(1) # 8-byte Folded Spill -; CHECK-NEXT: sldi 5, 12, 3 -; CHECK-NEXT: std 26, 80(1) # 8-byte Folded Spill -; CHECK-NEXT: std 2, 88(1) # 8-byte Folded Spill -; CHECK-NEXT: add 2, 24, 5 -; CHECK-NEXT: mr 9, 30 -; CHECK-NEXT: li 26, 1 -; CHECK-NEXT: add 3, 3, 24 -; CHECK-NEXT: addi 31, 3, 32 -; CHECK-NEXT: ld 3, 568(1) -; CHECK-NEXT: std 28, 144(1) # 8-byte Folded Spill -; CHECK-NEXT: std 29, 152(1) # 8-byte Folded Spill -; CHECK-NEXT: sldi 5, 20, 5 -; CHECK-NEXT: add 29, 20, 18 -; CHECK-NEXT: std 23, 64(1) # 8-byte Folded Spill -; CHECK-NEXT: std 25, 72(1) # 8-byte Folded Spill -; CHECK-NEXT: mulli 27, 20, 48 -; CHECK-NEXT: add 30, 24, 5 -; CHECK-NEXT: li 25, 0 -; CHECK-NEXT: lxv 40, 0(3) -; CHECK-NEXT: mulli 23, 20, 6 -; CHECK-NEXT: sldi 5, 29, 3 -; CHECK-NEXT: add 28, 24, 5 -; CHECK-NEXT: mr 5, 24 -; CHECK-NEXT: std 17, 32(1) # 8-byte Folded Spill -; CHECK-NEXT: std 19, 40(1) # 8-byte Folded Spill -; CHECK-NEXT: std 21, 48(1) # 8-byte Folded Spill -; CHECK-NEXT: std 22, 56(1) # 8-byte Folded Spill +; CHECK-NEXT: mr 8, 6 +; CHECK-NEXT: sldi 6, 3, 3 +; CHECK-NEXT: std 2, 144(1) # 8-byte Folded Spill +; CHECK-NEXT: std 11, 152(1) # 8-byte Folded Spill +; CHECK-NEXT: lxv 3, 0(2) +; CHECK-NEXT: lxv 2, 0(11) +; CHECK-NEXT: lxv 0, 0(7) +; CHECK-NEXT: add 6, 6, 23 +; CHECK-NEXT: lxv 7, 0(28) +; CHECK-NEXT: add 28, 3, 31 +; CHECK-NEXT: lxv 40, 0(9) +; CHECK-NEXT: lxv 39, 0(10) +; CHECK-NEXT: lxv 38, 0(15) +; CHECK-NEXT: lxv 33, 0(14) +; CHECK-NEXT: lxv 32, 0(16) +; CHECK-NEXT: lxv 37, 0(17) +; CHECK-NEXT: lxv 35, 0(18) +; CHECK-NEXT: lxv 13, 0(19) +; CHECK-NEXT: lxv 10, 0(20) +; CHECK-NEXT: lxv 8, 0(21) +; CHECK-NEXT: lxv 6, 0(22) +; CHECK-NEXT: lxv 4, 0(30) +; CHECK-NEXT: lxv 1, 0(12) +; CHECK-NEXT: lxv 36, 0(24) +; CHECK-NEXT: lxv 34, 0(25) +; CHECK-NEXT: lxv 12, 0(26) +; CHECK-NEXT: lxv 9, 0(27) +; CHECK-NEXT: lxv 5, 0(29) +; CHECK-NEXT: addi 5, 5, -2 +; CHECK-NEXT: sldi 11, 3, 4 +; CHECK-NEXT: std 12, 160(1) # 8-byte Folded Spill +; CHECK-NEXT: std 7, 168(1) # 8-byte Folded Spill +; CHECK-NEXT: add 7, 3, 0 +; CHECK-NEXT: add 12, 11, 23 +; CHECK-NEXT: addi 11, 6, 32 +; CHECK-NEXT: addi 12, 12, 32 +; CHECK-NEXT: std 22, 128(1) # 8-byte Folded Spill +; CHECK-NEXT: std 30, 136(1) # 8-byte Folded Spill +; CHECK-NEXT: std 26, 192(1) # 8-byte Folded Spill +; CHECK-NEXT: std 27, 200(1) # 8-byte Folded Spill +; CHECK-NEXT: mulli 26, 3, 48 +; CHECK-NEXT: mulli 22, 3, 6 +; CHECK-NEXT: sldi 6, 7, 3 +; CHECK-NEXT: add 30, 23, 6 +; CHECK-NEXT: std 29, 216(1) # 8-byte Folded Spill +; CHECK-NEXT: std 24, 176(1) # 8-byte Folded Spill +; CHECK-NEXT: std 25, 184(1) # 8-byte Folded Spill +; CHECK-NEXT: li 25, 1 +; CHECK-NEXT: li 24, 0 +; CHECK-NEXT: std 10, 48(1) # 8-byte Folded Spill +; CHECK-NEXT: std 15, 56(1) # 8-byte Folded Spill +; CHECK-NEXT: std 14, 64(1) # 8-byte Folded Spill +; CHECK-NEXT: std 16, 72(1) # 8-byte Folded Spill +; CHECK-NEXT: std 17, 80(1) # 8-byte Folded Spill +; CHECK-NEXT: std 18, 88(1) # 8-byte Folded Spill +; CHECK-NEXT: std 20, 112(1) # 8-byte Folded Spill +; CHECK-NEXT: std 21, 120(1) # 8-byte Folded Spill +; CHECK-NEXT: rldicl 5, 5, 61, 3 +; CHECK-NEXT: addi 2, 5, 1 +; CHECK-NEXT: sldi 5, 3, 5 +; CHECK-NEXT: add 29, 23, 5 +; CHECK-NEXT: sldi 5, 28, 3 +; CHECK-NEXT: add 27, 23, 5 +; CHECK-NEXT: mr 5, 23 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB0_3: # %_loop_2_do_.lr.ph ; CHECK-NEXT: # =>This Loop Header: Depth=1 ; CHECK-NEXT: # Child Loop BB0_4 Depth 2 -; CHECK-NEXT: maddld 6, 23, 25, 12 -; CHECK-NEXT: maddld 21, 23, 25, 11 -; CHECK-NEXT: mtctr 0 +; CHECK-NEXT: maddld 6, 22, 24, 7 +; CHECK-NEXT: maddld 20, 22, 24, 0 +; CHECK-NEXT: mtctr 2 ; CHECK-NEXT: sldi 6, 6, 3 -; CHECK-NEXT: add 22, 24, 6 -; CHECK-NEXT: sldi 6, 21, 3 -; CHECK-NEXT: add 21, 24, 6 -; CHECK-NEXT: maddld 6, 23, 25, 29 +; CHECK-NEXT: add 21, 23, 6 +; CHECK-NEXT: sldi 6, 20, 3 +; CHECK-NEXT: add 20, 23, 6 +; CHECK-NEXT: maddld 6, 22, 24, 28 ; CHECK-NEXT: sldi 6, 6, 3 -; CHECK-NEXT: add 19, 24, 6 -; CHECK-NEXT: maddld 6, 23, 25, 18 +; CHECK-NEXT: add 19, 23, 6 +; CHECK-NEXT: maddld 6, 22, 24, 31 ; CHECK-NEXT: sldi 6, 6, 3 -; CHECK-NEXT: add 17, 24, 6 -; CHECK-NEXT: maddld 6, 23, 25, 20 +; CHECK-NEXT: add 18, 23, 6 +; CHECK-NEXT: maddld 6, 22, 24, 3 ; CHECK-NEXT: sldi 6, 6, 3 -; CHECK-NEXT: add 15, 24, 6 -; CHECK-NEXT: mulld 6, 23, 25 +; CHECK-NEXT: add 17, 23, 6 +; CHECK-NEXT: mulld 6, 22, 24 ; CHECK-NEXT: sldi 6, 6, 3 -; CHECK-NEXT: add 14, 24, 6 -; CHECK-NEXT: mr 6, 7 +; CHECK-NEXT: add 16, 23, 6 +; CHECK-NEXT: mr 6, 8 ; CHECK-NEXT: .p2align 5 ; CHECK-NEXT: .LBB0_4: # %_loop_2_do_ ; CHECK-NEXT: # Parent Loop BB0_3 Depth=1 ; CHECK-NEXT: # => This Inner Loop Header: Depth=2 ; CHECK-NEXT: lxvp 42, 0(6) -; CHECK-NEXT: lxvp 44, 0(14) -; CHECK-NEXT: lxvp 46, 0(15) -; CHECK-NEXT: lxvp 48, 0(17) +; CHECK-NEXT: lxvp 44, 0(16) +; CHECK-NEXT: lxvp 46, 0(17) +; CHECK-NEXT: lxvp 48, 0(18) ; CHECK-NEXT: lxvp 50, 0(19) -; CHECK-NEXT: lxvp 30, 0(21) -; CHECK-NEXT: lxvp 28, 0(22) +; CHECK-NEXT: lxvp 30, 0(20) +; CHECK-NEXT: lxvp 28, 0(21) ; CHECK-NEXT: lxvp 26, 32(6) -; CHECK-NEXT: lxvp 24, 32(14) -; CHECK-NEXT: lxvp 22, 32(15) -; CHECK-NEXT: lxvp 20, 32(17) +; CHECK-NEXT: lxvp 24, 32(16) +; CHECK-NEXT: lxvp 22, 32(17) +; CHECK-NEXT: lxvp 20, 32(18) ; CHECK-NEXT: lxvp 18, 32(19) ; CHECK-NEXT: addi 6, 6, 64 -; CHECK-NEXT: addi 14, 14, 64 -; CHECK-NEXT: addi 15, 15, 64 +; CHECK-NEXT: addi 16, 16, 64 ; CHECK-NEXT: addi 17, 17, 64 +; CHECK-NEXT: addi 18, 18, 64 ; CHECK-NEXT: addi 19, 19, 64 -; CHECK-NEXT: xvmaddadp 4, 45, 43 -; CHECK-NEXT: xvmaddadp 1, 47, 43 -; CHECK-NEXT: xvmaddadp 0, 49, 43 -; CHECK-NEXT: xvmaddadp 2, 51, 43 -; CHECK-NEXT: xvmaddadp 41, 31, 43 -; CHECK-NEXT: xvmaddadp 3, 29, 43 -; CHECK-NEXT: xvmaddadp 40, 44, 42 -; CHECK-NEXT: xvmaddadp 39, 46, 42 -; CHECK-NEXT: xvmaddadp 38, 48, 42 -; CHECK-NEXT: xvmaddadp 33, 50, 42 -; CHECK-NEXT: xvmaddadp 32, 30, 42 -; CHECK-NEXT: xvmaddadp 37, 28, 42 -; CHECK-NEXT: lxvp 42, 32(21) -; CHECK-NEXT: lxvp 44, 32(22) +; CHECK-NEXT: xvmaddadp 41, 45, 43 +; CHECK-NEXT: xvmaddadp 40, 47, 43 +; CHECK-NEXT: xvmaddadp 39, 49, 43 +; CHECK-NEXT: xvmaddadp 38, 51, 43 +; CHECK-NEXT: xvmaddadp 33, 31, 43 +; CHECK-NEXT: xvmaddadp 32, 29, 43 +; CHECK-NEXT: xvmaddadp 37, 44, 42 +; CHECK-NEXT: xvmaddadp 35, 46, 42 +; CHECK-NEXT: xvmaddadp 13, 48, 42 +; CHECK-NEXT: xvmaddadp 11, 50, 42 +; CHECK-NEXT: xvmaddadp 10, 30, 42 +; CHECK-NEXT: xvmaddadp 8, 28, 42 +; CHECK-NEXT: lxvp 42, 32(20) +; CHECK-NEXT: lxvp 44, 32(21) +; CHECK-NEXT: addi 20, 20, 64 ; CHECK-NEXT: addi 21, 21, 64 -; CHECK-NEXT: addi 22, 22, 64 -; CHECK-NEXT: xvmaddadp 36, 25, 27 -; CHECK-NEXT: xvmaddadp 35, 23, 27 -; CHECK-NEXT: xvmaddadp 34, 21, 27 -; CHECK-NEXT: xvmaddadp 13, 19, 27 -; CHECK-NEXT: xvmaddadp 12, 24, 26 -; CHECK-NEXT: xvmaddadp 10, 22, 26 -; CHECK-NEXT: xvmaddadp 8, 20, 26 -; CHECK-NEXT: xvmaddadp 7, 18, 26 -; CHECK-NEXT: xvmaddadp 11, 43, 27 -; CHECK-NEXT: xvmaddadp 9, 45, 27 -; CHECK-NEXT: xvmaddadp 6, 42, 26 +; CHECK-NEXT: xvmaddadp 6, 25, 27 +; CHECK-NEXT: xvmaddadp 4, 23, 27 +; CHECK-NEXT: xvmaddadp 3, 21, 27 +; CHECK-NEXT: xvmaddadp 2, 19, 27 +; CHECK-NEXT: xvmaddadp 36, 24, 26 +; CHECK-NEXT: xvmaddadp 34, 22, 26 +; CHECK-NEXT: xvmaddadp 12, 20, 26 +; CHECK-NEXT: xvmaddadp 9, 18, 26 +; CHECK-NEXT: xvmaddadp 1, 43, 27 +; CHECK-NEXT: xvmaddadp 0, 45, 27 +; CHECK-NEXT: xvmaddadp 7, 42, 26 ; CHECK-NEXT: xvmaddadp 5, 44, 26 ; CHECK-NEXT: bdnz .LBB0_4 ; CHECK-NEXT: # %bb.5: # %_loop_2_endl_ ; CHECK-NEXT: # -; CHECK-NEXT: addi 26, 26, 6 -; CHECK-NEXT: add 5, 5, 27 -; CHECK-NEXT: add 31, 31, 27 -; CHECK-NEXT: add 2, 2, 27 -; CHECK-NEXT: add 16, 16, 27 -; CHECK-NEXT: add 30, 30, 27 -; CHECK-NEXT: add 28, 28, 27 -; CHECK-NEXT: addi 25, 25, 1 -; CHECK-NEXT: cmpld 26, 4 +; CHECK-NEXT: addi 25, 25, 6 +; CHECK-NEXT: add 5, 5, 26 +; CHECK-NEXT: add 11, 11, 26 +; CHECK-NEXT: add 30, 30, 26 +; CHECK-NEXT: add 12, 12, 26 +; CHECK-NEXT: add 29, 29, 26 +; CHECK-NEXT: add 27, 27, 26 +; CHECK-NEXT: addi 24, 24, 1 +; CHECK-NEXT: cmpld 25, 4 ; CHECK-NEXT: ble 0, .LBB0_3 ; CHECK-NEXT: # %bb.6: # %_loop_1_loopHeader_._return_bb_crit_edge.loopexit -; CHECK-NEXT: ld 4, 168(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 4, 0(4) -; CHECK-NEXT: ld 4, 176(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 1, 0(4) -; CHECK-NEXT: ld 4, 184(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 0, 0(4) -; CHECK-NEXT: ld 4, 128(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 2, 0(4) -; CHECK-NEXT: ld 4, 96(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 41, 0(8) -; CHECK-NEXT: stxv 3, 0(4) -; CHECK-NEXT: stxv 40, 0(3) ; CHECK-NEXT: ld 3, 32(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 39, 0(3) +; CHECK-NEXT: stxv 41, 0(3) ; CHECK-NEXT: ld 3, 40(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 38, 0(3) +; CHECK-NEXT: stxv 40, 0(3) ; CHECK-NEXT: ld 3, 48(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 33, 0(3) +; CHECK-NEXT: stxv 39, 0(3) ; CHECK-NEXT: ld 3, 56(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 32, 0(3) +; CHECK-NEXT: stxv 38, 0(3) ; CHECK-NEXT: ld 3, 64(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 37, 0(3) +; CHECK-NEXT: stxv 33, 0(3) ; CHECK-NEXT: ld 3, 72(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 36, 0(3) +; CHECK-NEXT: stxv 32, 0(3) ; CHECK-NEXT: ld 3, 80(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 35, 0(3) +; CHECK-NEXT: stxv 37, 0(3) ; CHECK-NEXT: ld 3, 88(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 34, 0(3) -; CHECK-NEXT: ld 3, 104(1) # 8-byte Folded Reload +; CHECK-NEXT: stxv 35, 0(3) +; CHECK-NEXT: ld 3, 96(1) # 8-byte Folded Reload ; CHECK-NEXT: stxv 13, 0(3) -; CHECK-NEXT: ld 3, 112(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 3, 104(1) # 8-byte Folded Reload ; CHECK-NEXT: stxv 11, 0(3) +; CHECK-NEXT: ld 3, 112(1) # 8-byte Folded Reload +; CHECK-NEXT: stxv 10, 0(3) ; CHECK-NEXT: ld 3, 120(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 9, 0(3) +; CHECK-NEXT: stxv 8, 0(3) +; CHECK-NEXT: ld 3, 128(1) # 8-byte Folded Reload +; CHECK-NEXT: stxv 6, 0(3) ; CHECK-NEXT: ld 3, 136(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 12, 0(9) -; CHECK-NEXT: stxv 10, 0(3) +; CHECK-NEXT: stxv 4, 0(3) ; CHECK-NEXT: ld 3, 144(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 8, 0(3) +; CHECK-NEXT: stxv 3, 0(3) ; CHECK-NEXT: ld 3, 152(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 7, 0(3) +; CHECK-NEXT: stxv 2, 0(3) ; CHECK-NEXT: ld 3, 160(1) # 8-byte Folded Reload -; CHECK-NEXT: stxv 6, 0(3) -; CHECK-NEXT: stxv 5, 0(10) +; CHECK-NEXT: stxv 1, 0(3) +; CHECK-NEXT: ld 3, 168(1) # 8-byte Folded Reload +; CHECK-NEXT: stxv 0, 0(3) +; CHECK-NEXT: ld 3, 176(1) # 8-byte Folded Reload +; CHECK-NEXT: stxv 36, 0(3) +; CHECK-NEXT: ld 3, 184(1) # 8-byte Folded Reload +; CHECK-NEXT: stxv 34, 0(3) +; CHECK-NEXT: ld 3, 192(1) # 8-byte Folded Reload +; CHECK-NEXT: stxv 12, 0(3) +; CHECK-NEXT: ld 3, 200(1) # 8-byte Folded Reload +; CHECK-NEXT: stxv 9, 0(3) +; CHECK-NEXT: ld 3, 208(1) # 8-byte Folded Reload +; CHECK-NEXT: stxv 7, 0(3) +; CHECK-NEXT: ld 3, 216(1) # 8-byte Folded Reload +; CHECK-NEXT: stxv 5, 0(3) ; CHECK-NEXT: .LBB0_7: # %_return_bb -; CHECK-NEXT: lfd 31, 440(1) # 8-byte Folded Reload -; CHECK-NEXT: lfd 30, 432(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 31, 328(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 30, 320(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 29, 312(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 28, 304(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 27, 296(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 26, 288(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 25, 280(1) # 8-byte Folded Reload -; CHECK-NEXT: lfd 29, 424(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 24, 272(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 23, 264(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 22, 256(1) # 8-byte Folded Reload -; CHECK-NEXT: lfd 28, 416(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 21, 248(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 20, 240(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 19, 232(1) # 8-byte Folded Reload -; CHECK-NEXT: lfd 27, 408(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 18, 224(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 17, 216(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 16, 208(1) # 8-byte Folded Reload -; CHECK-NEXT: lfd 26, 400(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 15, 200(1) # 8-byte Folded Reload -; CHECK-NEXT: ld 14, 192(1) # 8-byte Folded Reload -; CHECK-NEXT: lfd 25, 392(1) # 8-byte Folded Reload -; CHECK-NEXT: lfd 24, 384(1) # 8-byte Folded Reload -; CHECK-NEXT: lfd 23, 376(1) # 8-byte Folded Reload -; CHECK-NEXT: lfd 22, 368(1) # 8-byte Folded Reload -; CHECK-NEXT: lfd 21, 360(1) # 8-byte Folded Reload -; CHECK-NEXT: lfd 20, 352(1) # 8-byte Folded Reload -; CHECK-NEXT: lfd 19, 344(1) # 8-byte Folded Reload -; CHECK-NEXT: lfd 18, 336(1) # 8-byte Folded Reload -; CHECK-NEXT: addi 1, 1, 448 +; CHECK-NEXT: lfd 31, 472(1) # 8-byte Folded Reload +; CHECK-NEXT: lfd 30, 464(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 31, 360(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 30, 352(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 29, 344(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 28, 336(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 27, 328(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 26, 320(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 25, 312(1) # 8-byte Folded Reload +; CHECK-NEXT: lfd 29, 456(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 24, 304(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 23, 296(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 22, 288(1) # 8-byte Folded Reload +; CHECK-NEXT: lfd 28, 448(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 21, 280(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 20, 272(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 19, 264(1) # 8-byte Folded Reload +; CHECK-NEXT: lfd 27, 440(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 18, 256(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 17, 248(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 16, 240(1) # 8-byte Folded Reload +; CHECK-NEXT: lfd 26, 432(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 15, 232(1) # 8-byte Folded Reload +; CHECK-NEXT: ld 14, 224(1) # 8-byte Folded Reload +; CHECK-NEXT: lfd 25, 424(1) # 8-byte Folded Reload +; CHECK-NEXT: lfd 24, 416(1) # 8-byte Folded Reload +; CHECK-NEXT: lfd 23, 408(1) # 8-byte Folded Reload +; CHECK-NEXT: lfd 22, 400(1) # 8-byte Folded Reload +; CHECK-NEXT: lfd 21, 392(1) # 8-byte Folded Reload +; CHECK-NEXT: lfd 20, 384(1) # 8-byte Folded Reload +; CHECK-NEXT: lfd 19, 376(1) # 8-byte Folded Reload +; CHECK-NEXT: lfd 18, 368(1) # 8-byte Folded Reload +; CHECK-NEXT: addi 1, 1, 480 ; CHECK-NEXT: blr entry: %_val_l_ = load i32, i32* %.l, align 4 diff --git a/llvm/test/CodeGen/PowerPC/rematerializable-instruction-machine-licm.ll b/llvm/test/CodeGen/PowerPC/rematerializable-instruction-machine-licm.ll --- a/llvm/test/CodeGen/PowerPC/rematerializable-instruction-machine-licm.ll +++ b/llvm/test/CodeGen/PowerPC/rematerializable-instruction-machine-licm.ll @@ -375,9 +375,9 @@ ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: ld 12, 384(1) # 8-byte Folded Reload ; CHECK-NEXT: lwz 4, 396(1) # 4-byte Folded Reload -; CHECK-NEXT: addi 4, 4, 1 ; CHECK-NEXT: std 3, 0(12) ; CHECK-NEXT: ld 12, 376(1) # 8-byte Folded Reload +; CHECK-NEXT: addi 4, 4, 1 ; CHECK-NEXT: std 3, 0(12) ; CHECK-NEXT: ld 12, 368(1) # 8-byte Folded Reload ; CHECK-NEXT: std 3, 0(12) diff --git a/llvm/test/CodeGen/PowerPC/sched-addi.ll b/llvm/test/CodeGen/PowerPC/sched-addi.ll --- a/llvm/test/CodeGen/PowerPC/sched-addi.ll +++ b/llvm/test/CodeGen/PowerPC/sched-addi.ll @@ -15,8 +15,8 @@ ; CHECK-P9-NEXT: ld 5, 0(5) ; CHECK-P9-NEXT: addis 6, 2, scalars@toc@ha ; CHECK-P9-NEXT: addi 6, 6, scalars@toc@l -; CHECK-P9-NEXT: addi 6, 6, 16 ; CHECK-P9-NEXT: rldicr 5, 5, 0, 58 +; CHECK-P9-NEXT: addi 6, 6, 16 ; CHECK-P9-NEXT: addi 5, 5, -32 ; CHECK-P9-NEXT: lxvdsx 0, 0, 6 ; CHECK-P9-NEXT: rldicl 5, 5, 59, 5 @@ -35,9 +35,9 @@ ; CHECK-P9-NEXT: xvmuldp 1, 1, 0 ; CHECK-P9-NEXT: xvmuldp 4, 4, 0 ; CHECK-P9-NEXT: xvmuldp 3, 3, 0 +; CHECK-P9-NEXT: xvmuldp 6, 6, 0 ; CHECK-P9-NEXT: xvmuldp 5, 5, 0 ; CHECK-P9-NEXT: addi 4, 4, 256 -; CHECK-P9-NEXT: xvmuldp 6, 6, 0 ; CHECK-P9-NEXT: stxv 1, 16(3) ; CHECK-P9-NEXT: stxv 2, 0(3) ; CHECK-P9-NEXT: stxv 3, 48(3) diff --git a/llvm/test/CodeGen/PowerPC/sms-cpy-1.ll b/llvm/test/CodeGen/PowerPC/sms-cpy-1.ll --- a/llvm/test/CodeGen/PowerPC/sms-cpy-1.ll +++ b/llvm/test/CodeGen/PowerPC/sms-cpy-1.ll @@ -22,22 +22,22 @@ ; CHECK-NEXT: isellt 3, 3, 4 ; CHECK-NEXT: li 4, 0 ; CHECK-NEXT: addi 3, 3, 1 -; CHECK-NEXT: li 7, -1 ; CHECK-NEXT: li 5, 0 +; CHECK-NEXT: li 7, -1 ; CHECK-NEXT: mtctr 3 -; CHECK-NEXT: li 3, 1 ; CHECK-NEXT: lbz 5, 0(5) +; CHECK-NEXT: li 3, 1 ; CHECK-NEXT: bdz .LBB0_6 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: addi 3, 3, 1 -; CHECK-NEXT: addi 8, 7, -1 ; CHECK-NEXT: xori 6, 5, 84 ; CHECK-NEXT: clrldi 5, 7, 32 +; CHECK-NEXT: addi 3, 3, 1 +; CHECK-NEXT: addi 8, 7, -1 ; CHECK-NEXT: lbz 5, 0(5) ; CHECK-NEXT: bdz .LBB0_5 ; CHECK-NEXT: # %bb.2: -; CHECK-NEXT: addi 3, 3, 1 ; CHECK-NEXT: cntlzw 6, 6 +; CHECK-NEXT: addi 3, 3, 1 ; CHECK-NEXT: srwi 7, 6, 5 ; CHECK-NEXT: xori 6, 5, 84 ; CHECK-NEXT: clrldi 5, 8, 32 @@ -46,12 +46,12 @@ ; CHECK-NEXT: bdz .LBB0_4 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB0_3: -; CHECK-NEXT: addi 3, 3, 1 ; CHECK-NEXT: clrldi 10, 8, 32 -; CHECK-NEXT: addi 8, 8, -1 ; CHECK-NEXT: cntlzw 9, 6 ; CHECK-NEXT: xori 6, 5, 84 +; CHECK-NEXT: addi 8, 8, -1 ; CHECK-NEXT: lbz 5, 0(10) +; CHECK-NEXT: addi 3, 3, 1 ; CHECK-NEXT: add 4, 4, 7 ; CHECK-NEXT: srwi 7, 9, 5 ; CHECK-NEXT: bdnz .LBB0_3 diff --git a/llvm/test/CodeGen/PowerPC/sms-phi-1.ll b/llvm/test/CodeGen/PowerPC/sms-phi-1.ll --- a/llvm/test/CodeGen/PowerPC/sms-phi-1.ll +++ b/llvm/test/CodeGen/PowerPC/sms-phi-1.ll @@ -14,17 +14,17 @@ ; CHECK-NEXT: mr 30, 3 ; CHECK-NEXT: bl calloc ; CHECK-NEXT: nop +; CHECK-NEXT: clrldi 4, 30, 32 ; CHECK-NEXT: li 5, 0 ; CHECK-NEXT: addi 3, 3, -4 -; CHECK-NEXT: li 6, 1 -; CHECK-NEXT: clrldi 4, 30, 32 ; CHECK-NEXT: mtctr 4 ; CHECK-NEXT: mullw 4, 5, 5 +; CHECK-NEXT: li 6, 1 ; CHECK-NEXT: bdz .LBB0_3 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: addi 5, 6, 1 ; CHECK-NEXT: stwu 4, 4(3) ; CHECK-NEXT: mullw 4, 6, 6 +; CHECK-NEXT: addi 5, 6, 1 ; CHECK-NEXT: bdz .LBB0_3 ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: .LBB0_2: diff --git a/llvm/test/CodeGen/PowerPC/sms-simple.ll b/llvm/test/CodeGen/PowerPC/sms-simple.ll --- a/llvm/test/CodeGen/PowerPC/sms-simple.ll +++ b/llvm/test/CodeGen/PowerPC/sms-simple.ll @@ -9,15 +9,15 @@ define dso_local i32* @foo() local_unnamed_addr { ; CHECK-LABEL: foo: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addis r5, r2, x@toc@ha -; CHECK-NEXT: addis r6, r2, y@toc@ha +; CHECK-NEXT: addis r5, r2, y@toc@ha ; CHECK-NEXT: li r7, 340 -; CHECK-NEXT: addi r5, r5, x@toc@l -; CHECK-NEXT: addi r5, r5, -8 -; CHECK-NEXT: addi r3, r6, y@toc@l -; CHECK-NEXT: lwz r6, y@toc@l(r6) +; CHECK-NEXT: addi r3, r5, y@toc@l +; CHECK-NEXT: lwz r6, y@toc@l(r5) +; CHECK-NEXT: addis r5, r2, x@toc@ha ; CHECK-NEXT: mtctr r7 +; CHECK-NEXT: addi r5, r5, x@toc@l ; CHECK-NEXT: addi r4, r3, -8 +; CHECK-NEXT: addi r5, r5, -8 ; CHECK-NEXT: lwzu r7, 12(r5) ; CHECK-NEXT: maddld r6, r7, r7, r6 ; CHECK-NEXT: lwz r7, 4(r5) diff --git a/llvm/test/CodeGen/PowerPC/stack-clash-dynamic-alloca.ll b/llvm/test/CodeGen/PowerPC/stack-clash-dynamic-alloca.ll --- a/llvm/test/CodeGen/PowerPC/stack-clash-dynamic-alloca.ll +++ b/llvm/test/CodeGen/PowerPC/stack-clash-dynamic-alloca.ll @@ -50,9 +50,9 @@ ; CHECK-P9-LE-NEXT: std r31, -8(r1) ; CHECK-P9-LE-NEXT: stdu r1, -48(r1) ; CHECK-P9-LE-NEXT: rldic r3, r3, 2, 30 -; CHECK-P9-LE-NEXT: addi r3, r3, 15 ; CHECK-P9-LE-NEXT: li r6, -32768 ; CHECK-P9-LE-NEXT: mr r31, r1 +; CHECK-P9-LE-NEXT: addi r3, r3, 15 ; CHECK-P9-LE-NEXT: rldicl r3, r3, 60, 4 ; CHECK-P9-LE-NEXT: rldicl r3, r3, 4, 29 ; CHECK-P9-LE-NEXT: neg r5, r3 @@ -189,9 +189,9 @@ ; CHECK-P9-LE-NEXT: std r31, -8(r1) ; CHECK-P9-LE-NEXT: stdu r1, -48(r1) ; CHECK-P9-LE-NEXT: rldic r4, r3, 2, 30 -; CHECK-P9-LE-NEXT: addi r4, r4, 15 ; CHECK-P9-LE-NEXT: li r7, -4096 ; CHECK-P9-LE-NEXT: mr r31, r1 +; CHECK-P9-LE-NEXT: addi r4, r4, 15 ; CHECK-P9-LE-NEXT: rldicl r4, r4, 60, 4 ; CHECK-P9-LE-NEXT: rldicl r4, r4, 4, 29 ; CHECK-P9-LE-NEXT: neg r6, r4 @@ -333,10 +333,10 @@ ; CHECK-P9-LE-NEXT: std r31, -8(r1) ; CHECK-P9-LE-NEXT: stdu r1, -48(r1) ; CHECK-P9-LE-NEXT: rldic r3, r3, 2, 30 -; CHECK-P9-LE-NEXT: addi r3, r3, 15 ; CHECK-P9-LE-NEXT: lis r5, -1 -; CHECK-P9-LE-NEXT: ori r5, r5, 0 ; CHECK-P9-LE-NEXT: mr r31, r1 +; CHECK-P9-LE-NEXT: addi r3, r3, 15 +; CHECK-P9-LE-NEXT: ori r5, r5, 0 ; CHECK-P9-LE-NEXT: rldicl r3, r3, 60, 4 ; CHECK-P9-LE-NEXT: rldicl r3, r3, 4, 29 ; CHECK-P9-LE-NEXT: neg r6, r3