Index: llvm/include/llvm/CodeGen/ModuloSchedule.h =================================================================== --- llvm/include/llvm/CodeGen/ModuloSchedule.h +++ llvm/include/llvm/CodeGen/ModuloSchedule.h @@ -192,7 +192,8 @@ ValueMapTy *VRMap, MBBVectorTy &PrologBBs); void generateEpilog(unsigned LastStage, MachineBasicBlock *KernelBB, MachineBasicBlock *OrigBB, ValueMapTy *VRMap, - MBBVectorTy &EpilogBBs, MBBVectorTy &PrologBBs); + ValueMapTy *VRMapPhi, MBBVectorTy &EpilogBBs, + MBBVectorTy &PrologBBs); void generateExistingPhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1, MachineBasicBlock *BB2, MachineBasicBlock *KernelBB, ValueMapTy *VRMap, InstrMapTy &InstrMap, @@ -200,8 +201,9 @@ bool IsLast); void generatePhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1, MachineBasicBlock *BB2, MachineBasicBlock *KernelBB, - ValueMapTy *VRMap, InstrMapTy &InstrMap, - unsigned LastStageNum, unsigned CurStageNum, bool IsLast); + ValueMapTy *VRMap, ValueMapTy *VRMapPhi, + InstrMapTy &InstrMap, unsigned LastStageNum, + unsigned CurStageNum, bool IsLast); void removeDeadInstructions(MachineBasicBlock *KernelBB, MBBVectorTy &EpilogBBs); void splitLifetimes(MachineBasicBlock *KernelBB, MBBVectorTy &EpilogBBs); Index: llvm/lib/CodeGen/ModuloSchedule.cpp =================================================================== --- llvm/lib/CodeGen/ModuloSchedule.cpp +++ llvm/lib/CodeGen/ModuloSchedule.cpp @@ -116,6 +116,12 @@ // a map between register names in the original block and the names created // in each stage of the pipelined loop. ValueMapTy *VRMap = new ValueMapTy[(MaxStageCount + 1) * 2]; + + // The renaming destination by Phis for the registers across stages. + // This map is updated during Phis generation to point to the most recent + // renaming destination. + ValueMapTy *VRMapPhi = new ValueMapTy[(MaxStageCount + 1) * 2]; + InstrMapTy InstrMap; SmallVector PrologBBs; @@ -151,14 +157,15 @@ generateExistingPhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap, InstrMap, MaxStageCount, MaxStageCount, false); - generatePhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap, InstrMap, - MaxStageCount, MaxStageCount, false); + generatePhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap, VRMapPhi, + InstrMap, MaxStageCount, MaxStageCount, false); LLVM_DEBUG(dbgs() << "New block\n"; KernelBB->dump();); SmallVector EpilogBBs; // Generate the epilog instructions to complete the pipeline. - generateEpilog(MaxStageCount, KernelBB, BB, VRMap, EpilogBBs, PrologBBs); + generateEpilog(MaxStageCount, KernelBB, BB, VRMap, VRMapPhi, EpilogBBs, + PrologBBs); // We need this step because the register allocation doesn't handle some // situations well, so we insert copies to help out. @@ -171,6 +178,7 @@ addBranches(*Preheader, PrologBBs, KernelBB, EpilogBBs, VRMap); delete[] VRMap; + delete[] VRMapPhi; } void ModuloScheduleExpander::cleanup() { @@ -242,7 +250,8 @@ /// block for each stage that needs to complete. void ModuloScheduleExpander::generateEpilog( unsigned LastStage, MachineBasicBlock *KernelBB, MachineBasicBlock *OrigBB, - ValueMapTy *VRMap, MBBVectorTy &EpilogBBs, MBBVectorTy &PrologBBs) { + ValueMapTy *VRMap, ValueMapTy *VRMapPhi, MBBVectorTy &EpilogBBs, + MBBVectorTy &PrologBBs) { // We need to change the branch from the kernel to the first epilog block, so // this call to analyze branch uses the kernel rather than the original BB. MachineBasicBlock *TBB = nullptr, *FBB = nullptr; @@ -296,8 +305,8 @@ } generateExistingPhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap, InstrMap, LastStage, EpilogStage, i == 1); - generatePhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap, InstrMap, - LastStage, EpilogStage, i == 1); + generatePhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap, VRMapPhi, + InstrMap, LastStage, EpilogStage, i == 1); PredBB = NewBB; LLVM_DEBUG({ @@ -593,8 +602,9 @@ /// use in the pipelined sequence. void ModuloScheduleExpander::generatePhis( MachineBasicBlock *NewBB, MachineBasicBlock *BB1, MachineBasicBlock *BB2, - MachineBasicBlock *KernelBB, ValueMapTy *VRMap, InstrMapTy &InstrMap, - unsigned LastStageNum, unsigned CurStageNum, bool IsLast) { + MachineBasicBlock *KernelBB, ValueMapTy *VRMap, ValueMapTy *VRMapPhi, + InstrMapTy &InstrMap, unsigned LastStageNum, unsigned CurStageNum, + bool IsLast) { // Compute the stage number that contains the initial Phi value, and // the Phi from the previous stage. unsigned PrologStage = 0; @@ -631,26 +641,49 @@ if (!InKernel && (unsigned)StageScheduled > PrologStage) continue; - unsigned PhiOp2 = VRMap[PrevStage][Def]; - if (MachineInstr *InstOp2 = MRI.getVRegDef(PhiOp2)) - if (InstOp2->isPHI() && InstOp2->getParent() == NewBB) - PhiOp2 = getLoopPhiReg(*InstOp2, BB2); + unsigned PhiOp2; + if (InKernel) { + PhiOp2 = VRMap[PrevStage][Def]; + if (MachineInstr *InstOp2 = MRI.getVRegDef(PhiOp2)) + if (InstOp2->isPHI() && InstOp2->getParent() == NewBB) + PhiOp2 = getLoopPhiReg(*InstOp2, BB2); + } // The number of Phis can't exceed the number of prolog stages. The // prolog stage number is zero based. if (NumPhis > PrologStage + 1 - StageScheduled) NumPhis = PrologStage + 1 - StageScheduled; for (unsigned np = 0; np < NumPhis; ++np) { + // Example for + // Org: + // %Org = ... (Scheduled at Stage#0, NumPhi = 2) + // + // Prolog0 (Stage0): + // %Clone0 = ... + // Prolog1 (Stage1): + // %Clone1 = ... + // Kernel (Stage2): + // %Phi0 = Phi %Clone1, Prolog1, %Clone2, Kernel + // %Phi1 = Phi %Clone0, Prolog1, %Phi0, Kernel + // %Clone2 = ... + // Epilog0 (Stage3): + // %Phi2 = Phi %Clone1, Prolog1, %Clone2, Kernel + // %Phi3 = Phi %Clone0, Prolog1, %Phi0, Kernel + // Epilog1 (Stage4): + // %Phi4 = Phi %Clone0, Prolog0, %Phi2, Epilog0 + // + // VRMap = {0: %Clone0, 1: %Clone1, 2: %Clone2} + // VRMapPhi (after Kernel) = {0: %Phi1, 1: %Phi0} + // VRMapPhi (after Epilog0) = {0: %Phi3, 1: %Phi2} + unsigned PhiOp1 = VRMap[PrologStage][Def]; if (np <= PrologStage) PhiOp1 = VRMap[PrologStage - np][Def]; - if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1)) { - if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB) - PhiOp1 = getInitPhiReg(*InstOp1, KernelBB); - if (InstOp1->isPHI() && InstOp1->getParent() == NewBB) - PhiOp1 = getInitPhiReg(*InstOp1, NewBB); + if (!InKernel) { + if (PrevStage == LastStageNum && np == 0) + PhiOp2 = VRMap[LastStageNum][Def]; + else + PhiOp2 = VRMapPhi[PrevStage - np][Def]; } - if (!InKernel) - PhiOp2 = VRMap[PrevStage - np][Def]; const TargetRegisterClass *RC = MRI.getRegClass(Def); Register NewReg = MRI.createVirtualRegister(RC); @@ -672,9 +705,9 @@ NewReg); PhiOp2 = NewReg; - VRMap[PrevStage - np - 1][Def] = NewReg; + VRMapPhi[PrevStage - np - 1][Def] = NewReg; } else { - VRMap[CurStageNum - np][Def] = NewReg; + VRMapPhi[CurStageNum - np][Def] = NewReg; if (np == NumPhis - 1) rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def, NewReg); Index: llvm/test/CodeGen/PowerPC/sms-large-stages.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/PowerPC/sms-large-stages.mir @@ -0,0 +1,296 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 --ppc-enable-pipeliner -pipeliner-max-stages=10 -run-pass=pipeliner -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=CHECK + +--- | + define dso_local void @f(ptr noalias nocapture noundef writeonly %a, ptr nocapture noundef readonly %b, i32 noundef signext %n) local_unnamed_addr #0 { + entry: + %wide.trip.count = zext i32 %n to i64 + %uglygep2 = getelementptr i8, ptr %b, i64 -4 + %uglygep3 = getelementptr i8, ptr %a, i64 -4 + call void @llvm.set.loop.iterations.i64(i64 %wide.trip.count) + br label %for.body + + for.cond.cleanup: ; preds = %for.body + ret void + + for.body: ; preds = %for.body, %entry + %0 = phi ptr [ %uglygep2, %entry ], [ %3, %for.body ] + %1 = phi ptr [ %uglygep3, %entry ], [ %2, %for.body ] + %2 = getelementptr i8, ptr %1, i64 4 + %3 = getelementptr i8, ptr %0, i64 4 + %4 = load float, ptr %3, align 4 + %add = fadd float %4, %4 + %add3 = fadd float %4, %add + store float %add3, ptr %2, align 4 + %5 = call i1 @llvm.loop.decrement.i64(i64 1) + br i1 %5, label %for.body, label %for.cond.cleanup, !llvm.loop !0 + } + + ; Function Attrs: nocallback noduplicate nofree nosync nounwind willreturn + declare void @llvm.set.loop.iterations.i64(i64) #1 + + ; Function Attrs: nocallback noduplicate nofree nosync nounwind willreturn + declare i1 @llvm.loop.decrement.i64(i64) #1 + + attributes #0 = { argmemonly nofree norecurse nosync nounwind uwtable "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pwr9" "target-features"="+altivec,+bpermd,+crbits,+crypto,+direct-move,+extdiv,+htm,+isa-v206-instructions,+isa-v207-instructions,+isa-v30-instructions,+power8-vector,+power9-vector,+quadword-atomics,+vsx,-privileged,-rop-protect,-spe" } + attributes #1 = { nocallback noduplicate nofree nosync nounwind willreturn } + + !0 = distinct !{!0, !1, !2, !3} + !1 = !{!"llvm.loop.mustprogress"} + !2 = !{!"llvm.loop.unroll.disable"} + !3 = !{!"llvm.loop.pipeline.initiationinterval", i32 3} + +... +--- +name: f +alignment: 16 +tracksRegLiveness: true +registers: + - { id: 0, class: g8rc } + - { id: 1, class: g8rc } + - { id: 2, class: g8rc_and_g8rc_nox0 } + - { id: 3, class: g8rc_and_g8rc_nox0 } + - { id: 4, class: g8rc } + - { id: 5, class: g8rc } + - { id: 6, class: g8rc_and_g8rc_nox0 } + - { id: 7, class: g8rc_and_g8rc_nox0 } + - { id: 8, class: g8rc } + - { id: 9, class: g8rc } + - { id: 10, class: f4rc } + - { id: 11, class: g8rc_and_g8rc_nox0 } + - { id: 12, class: vssrc } + - { id: 13, class: f4rc } + - { id: 14, class: g8rc_and_g8rc_nox0 } +liveins: + - { reg: '$x3', virtual-reg: '%6' } + - { reg: '$x4', virtual-reg: '%7' } + - { reg: '$x5', virtual-reg: '%8' } +frameInfo: + maxAlignment: 1 +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: f + ; CHECK: bb.0.entry: + ; CHECK-NEXT: successors: %bb.3(0x80000000) + ; CHECK-NEXT: liveins: $x3, $x4, $x5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:g8rc = COPY $x5 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:g8rc_and_g8rc_nox0 = COPY $x4 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:g8rc_and_g8rc_nox0 = COPY $x3 + ; CHECK-NEXT: [[RLDICL:%[0-9]+]]:g8rc = RLDICL [[COPY]], 0, 32 + ; CHECK-NEXT: [[ADDI8_:%[0-9]+]]:g8rc_and_g8rc_nox0 = ADDI8 [[COPY1]], -4 + ; CHECK-NEXT: [[ADDI8_1:%[0-9]+]]:g8rc = ADDI8 [[COPY2]], -4 + ; CHECK-NEXT: MTCTR8loop [[RLDICL]], implicit-def dead $ctr8 + ; CHECK-NEXT: B %bb.3 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1.for.cond.cleanup: + ; CHECK-NEXT: BLR8 implicit $lr8, implicit $rm + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.3.for.body: + ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.17(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[LFSU:%[0-9]+]]:f4rc, [[LFSU1:%[0-9]+]]:g8rc_and_g8rc_nox0 = LFSU 4, [[ADDI8_]] :: (load (s32) from %ir.3) + ; CHECK-NEXT: [[COPY3:%[0-9]+]]:g8rc_and_g8rc_nox0 = COPY [[LFSU1]] + ; CHECK-NEXT: BDZ8 %bb.17, implicit-def $ctr8, implicit $ctr8 + ; CHECK-NEXT: B %bb.4 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.4.for.body: + ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.16(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[LFSU2:%[0-9]+]]:f4rc, [[LFSU3:%[0-9]+]]:g8rc_and_g8rc_nox0 = LFSU 4, [[COPY3]] :: (load unknown-size from %ir.3, align 4) + ; CHECK-NEXT: [[COPY4:%[0-9]+]]:g8rc_and_g8rc_nox0 = COPY [[LFSU3]] + ; CHECK-NEXT: BDZ8 %bb.16, implicit-def $ctr8, implicit $ctr8 + ; CHECK-NEXT: B %bb.5 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.5.for.body: + ; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.15(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %21:vssrc = nofpexcept XSADDSP [[LFSU]], [[LFSU]] + ; CHECK-NEXT: [[LFSU4:%[0-9]+]]:f4rc, [[LFSU5:%[0-9]+]]:g8rc_and_g8rc_nox0 = LFSU 4, [[COPY4]] :: (load unknown-size from %ir.3, align 4) + ; CHECK-NEXT: [[COPY5:%[0-9]+]]:g8rc_and_g8rc_nox0 = COPY [[LFSU5]] + ; CHECK-NEXT: BDZ8 %bb.15, implicit-def $ctr8, implicit $ctr8 + ; CHECK-NEXT: B %bb.6 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.6.for.body: + ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.14(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %25:vssrc = nofpexcept XSADDSP [[LFSU2]], [[LFSU2]] + ; CHECK-NEXT: [[LFSU6:%[0-9]+]]:f4rc, [[LFSU7:%[0-9]+]]:g8rc_and_g8rc_nox0 = LFSU 4, [[COPY5]] :: (load unknown-size from %ir.3, align 4) + ; CHECK-NEXT: [[COPY6:%[0-9]+]]:g8rc_and_g8rc_nox0 = COPY [[LFSU7]] + ; CHECK-NEXT: BDZ8 %bb.14, implicit-def $ctr8, implicit $ctr8 + ; CHECK-NEXT: B %bb.7 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.7.for.body: + ; CHECK-NEXT: successors: %bb.8(0x40000000), %bb.13(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %29:f4rc = nofpexcept XSADDSP [[LFSU]], %21 + ; CHECK-NEXT: %30:vssrc = nofpexcept XSADDSP [[LFSU4]], [[LFSU4]] + ; CHECK-NEXT: [[LFSU8:%[0-9]+]]:f4rc, [[LFSU9:%[0-9]+]]:g8rc_and_g8rc_nox0 = LFSU 4, [[COPY6]] :: (load unknown-size from %ir.3, align 4) + ; CHECK-NEXT: [[COPY7:%[0-9]+]]:g8rc_and_g8rc_nox0 = COPY [[LFSU9]] + ; CHECK-NEXT: BDZ8 %bb.13, implicit-def $ctr8, implicit $ctr8 + ; CHECK-NEXT: B %bb.8 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.8.for.body: + ; CHECK-NEXT: successors: %bb.9(0x40000000), %bb.12(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %34:f4rc = nofpexcept XSADDSP [[LFSU2]], %25 + ; CHECK-NEXT: %35:vssrc = nofpexcept XSADDSP [[LFSU6]], [[LFSU6]] + ; CHECK-NEXT: [[LFSU10:%[0-9]+]]:f4rc, [[LFSU11:%[0-9]+]]:g8rc_and_g8rc_nox0 = LFSU 4, [[COPY7]] :: (load unknown-size from %ir.3, align 4) + ; CHECK-NEXT: [[COPY8:%[0-9]+]]:g8rc_and_g8rc_nox0 = COPY [[LFSU11]] + ; CHECK-NEXT: BDZ8 %bb.12, implicit-def $ctr8, implicit $ctr8 + ; CHECK-NEXT: B %bb.9 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.9.for.body: + ; CHECK-NEXT: successors: %bb.10(0x80000000), %bb.11(0x00000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %39:f4rc = nofpexcept XSADDSP [[LFSU4]], %30 + ; CHECK-NEXT: %40:vssrc = nofpexcept XSADDSP [[LFSU8]], [[LFSU8]] + ; CHECK-NEXT: [[LFSU12:%[0-9]+]]:f4rc, [[LFSU13:%[0-9]+]]:g8rc_and_g8rc_nox0 = LFSU 4, [[COPY8]] :: (load unknown-size from %ir.3, align 4) + ; CHECK-NEXT: [[COPY9:%[0-9]+]]:g8rc = COPY [[LFSU13]] + ; CHECK-NEXT: BDZ8 %bb.11, implicit-def $ctr8, implicit $ctr8 + ; CHECK-NEXT: B %bb.10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.10.for.body: + ; CHECK-NEXT: successors: %bb.10(0x7c000000), %bb.11(0x04000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI:%[0-9]+]]:g8rc_and_g8rc_nox0 = PHI [[COPY9]], %bb.9, %50, %bb.10 + ; CHECK-NEXT: [[PHI1:%[0-9]+]]:g8rc_and_g8rc_nox0 = PHI [[ADDI8_1]], %bb.9, %47, %bb.10 + ; CHECK-NEXT: [[PHI2:%[0-9]+]]:f4rc = PHI [[LFSU12]], %bb.9, %45, %bb.10 + ; CHECK-NEXT: [[PHI3:%[0-9]+]]:f4rc = PHI [[LFSU10]], %bb.9, [[PHI2]], %bb.10 + ; CHECK-NEXT: [[PHI4:%[0-9]+]]:f4rc = PHI [[LFSU8]], %bb.9, [[PHI3]], %bb.10 + ; CHECK-NEXT: [[PHI5:%[0-9]+]]:f4rc = PHI [[LFSU6]], %bb.9, [[PHI4]], %bb.10 + ; CHECK-NEXT: [[PHI6:%[0-9]+]]:vssrc = PHI %40, %bb.9, %48, %bb.10 + ; CHECK-NEXT: [[PHI7:%[0-9]+]]:vssrc = PHI %35, %bb.9, [[PHI6]], %bb.10 + ; CHECK-NEXT: [[PHI8:%[0-9]+]]:f4rc = PHI %39, %bb.9, %49, %bb.10 + ; CHECK-NEXT: [[PHI9:%[0-9]+]]:f4rc = PHI %34, %bb.9, [[PHI8]], %bb.10 + ; CHECK-NEXT: [[PHI10:%[0-9]+]]:f4rc = PHI %29, %bb.9, [[PHI9]], %bb.10 + ; CHECK-NEXT: [[STFSU:%[0-9]+]]:g8rc_and_g8rc_nox0 = STFSU [[PHI10]], 4, [[PHI1]] :: (store (s32) into %ir.2) + ; CHECK-NEXT: [[LFSU14:%[0-9]+]]:f4rc, [[LFSU15:%[0-9]+]]:g8rc_and_g8rc_nox0 = LFSU 4, [[PHI]] :: (load unknown-size from %ir.3, align 4) + ; CHECK-NEXT: [[COPY10:%[0-9]+]]:g8rc = COPY [[STFSU]] + ; CHECK-NEXT: %48:vssrc = nofpexcept XSADDSP [[PHI3]], [[PHI3]] + ; CHECK-NEXT: %49:f4rc = nofpexcept XSADDSP [[PHI5]], [[PHI7]] + ; CHECK-NEXT: [[COPY11:%[0-9]+]]:g8rc = COPY [[LFSU15]] + ; CHECK-NEXT: BDNZ8 %bb.10, implicit-def $ctr8, implicit $ctr8 + ; CHECK-NEXT: B %bb.11 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.11: + ; CHECK-NEXT: successors: %bb.12(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI11:%[0-9]+]]:g8rc_and_g8rc_nox0 = PHI [[ADDI8_1]], %bb.9, [[COPY10]], %bb.10 + ; CHECK-NEXT: [[PHI12:%[0-9]+]]:f4rc = PHI [[LFSU12]], %bb.9, [[LFSU14]], %bb.10 + ; CHECK-NEXT: [[PHI13:%[0-9]+]]:f4rc = PHI [[LFSU10]], %bb.9, [[PHI2]], %bb.10 + ; CHECK-NEXT: [[PHI14:%[0-9]+]]:f4rc = PHI [[LFSU8]], %bb.9, [[PHI3]], %bb.10 + ; CHECK-NEXT: [[PHI15:%[0-9]+]]:f4rc = PHI [[LFSU6]], %bb.9, [[PHI4]], %bb.10 + ; CHECK-NEXT: [[PHI16:%[0-9]+]]:vssrc = PHI %40, %bb.9, %48, %bb.10 + ; CHECK-NEXT: [[PHI17:%[0-9]+]]:vssrc = PHI %35, %bb.9, [[PHI6]], %bb.10 + ; CHECK-NEXT: [[PHI18:%[0-9]+]]:f4rc = PHI %39, %bb.9, %49, %bb.10 + ; CHECK-NEXT: [[PHI19:%[0-9]+]]:f4rc = PHI %34, %bb.9, [[PHI8]], %bb.10 + ; CHECK-NEXT: [[PHI20:%[0-9]+]]:f4rc = PHI %29, %bb.9, [[PHI9]], %bb.10 + ; CHECK-NEXT: [[STFSU1:%[0-9]+]]:g8rc_and_g8rc_nox0 = STFSU [[PHI20]], 4, [[PHI11]] :: (store unknown-size into %ir.2, align 4) + ; CHECK-NEXT: [[COPY12:%[0-9]+]]:g8rc = COPY [[STFSU1]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.12: + ; CHECK-NEXT: successors: %bb.13(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI21:%[0-9]+]]:g8rc_and_g8rc_nox0 = PHI [[ADDI8_1]], %bb.8, [[COPY12]], %bb.11 + ; CHECK-NEXT: [[PHI22:%[0-9]+]]:f4rc = PHI [[LFSU10]], %bb.8, [[PHI12]], %bb.11 + ; CHECK-NEXT: [[PHI23:%[0-9]+]]:f4rc = PHI [[LFSU8]], %bb.8, [[PHI13]], %bb.11 + ; CHECK-NEXT: [[PHI24:%[0-9]+]]:f4rc = PHI [[LFSU6]], %bb.8, [[PHI14]], %bb.11 + ; CHECK-NEXT: [[PHI25:%[0-9]+]]:f4rc = PHI [[LFSU4]], %bb.8, [[PHI15]], %bb.11 + ; CHECK-NEXT: [[PHI26:%[0-9]+]]:vssrc = PHI %35, %bb.8, [[PHI16]], %bb.11 + ; CHECK-NEXT: [[PHI27:%[0-9]+]]:vssrc = PHI %30, %bb.8, [[PHI17]], %bb.11 + ; CHECK-NEXT: [[PHI28:%[0-9]+]]:f4rc = PHI %34, %bb.8, [[PHI18]], %bb.11 + ; CHECK-NEXT: [[PHI29:%[0-9]+]]:f4rc = PHI %29, %bb.8, [[PHI19]], %bb.11 + ; CHECK-NEXT: [[STFSU2:%[0-9]+]]:g8rc_and_g8rc_nox0 = STFSU [[PHI29]], 4, [[PHI21]] :: (store unknown-size into %ir.2, align 4) + ; CHECK-NEXT: [[COPY13:%[0-9]+]]:g8rc = COPY [[STFSU2]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.13: + ; CHECK-NEXT: successors: %bb.14(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI30:%[0-9]+]]:g8rc_and_g8rc_nox0 = PHI [[ADDI8_1]], %bb.7, [[COPY13]], %bb.12 + ; CHECK-NEXT: [[PHI31:%[0-9]+]]:f4rc = PHI [[LFSU8]], %bb.7, [[PHI22]], %bb.12 + ; CHECK-NEXT: [[PHI32:%[0-9]+]]:f4rc = PHI [[LFSU6]], %bb.7, [[PHI23]], %bb.12 + ; CHECK-NEXT: [[PHI33:%[0-9]+]]:f4rc = PHI [[LFSU4]], %bb.7, [[PHI24]], %bb.12 + ; CHECK-NEXT: [[PHI34:%[0-9]+]]:f4rc = PHI [[LFSU2]], %bb.7, [[PHI25]], %bb.12 + ; CHECK-NEXT: [[PHI35:%[0-9]+]]:vssrc = PHI %30, %bb.7, [[PHI26]], %bb.12 + ; CHECK-NEXT: [[PHI36:%[0-9]+]]:vssrc = PHI %25, %bb.7, [[PHI27]], %bb.12 + ; CHECK-NEXT: [[PHI37:%[0-9]+]]:f4rc = PHI %29, %bb.7, [[PHI28]], %bb.12 + ; CHECK-NEXT: [[STFSU3:%[0-9]+]]:g8rc_and_g8rc_nox0 = STFSU [[PHI37]], 4, [[PHI30]] :: (store unknown-size into %ir.2, align 4) + ; CHECK-NEXT: [[COPY14:%[0-9]+]]:g8rc = COPY [[STFSU3]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.14: + ; CHECK-NEXT: successors: %bb.15(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI38:%[0-9]+]]:g8rc_and_g8rc_nox0 = PHI [[ADDI8_1]], %bb.6, [[COPY14]], %bb.13 + ; CHECK-NEXT: [[PHI39:%[0-9]+]]:f4rc = PHI [[LFSU6]], %bb.6, [[PHI31]], %bb.13 + ; CHECK-NEXT: [[PHI40:%[0-9]+]]:f4rc = PHI [[LFSU4]], %bb.6, [[PHI32]], %bb.13 + ; CHECK-NEXT: [[PHI41:%[0-9]+]]:f4rc = PHI [[LFSU2]], %bb.6, [[PHI33]], %bb.13 + ; CHECK-NEXT: [[PHI42:%[0-9]+]]:f4rc = PHI [[LFSU]], %bb.6, [[PHI34]], %bb.13 + ; CHECK-NEXT: [[PHI43:%[0-9]+]]:vssrc = PHI %25, %bb.6, [[PHI35]], %bb.13 + ; CHECK-NEXT: [[PHI44:%[0-9]+]]:vssrc = PHI %21, %bb.6, [[PHI36]], %bb.13 + ; CHECK-NEXT: %98:f4rc = nofpexcept XSADDSP [[PHI42]], [[PHI44]] + ; CHECK-NEXT: [[STFSU4:%[0-9]+]]:g8rc_and_g8rc_nox0 = STFSU %98, 4, [[PHI38]] :: (store unknown-size into %ir.2, align 4) + ; CHECK-NEXT: [[COPY15:%[0-9]+]]:g8rc = COPY [[STFSU4]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.15: + ; CHECK-NEXT: successors: %bb.16(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI45:%[0-9]+]]:g8rc_and_g8rc_nox0 = PHI [[ADDI8_1]], %bb.5, [[COPY15]], %bb.14 + ; CHECK-NEXT: [[PHI46:%[0-9]+]]:f4rc = PHI [[LFSU4]], %bb.5, [[PHI39]], %bb.14 + ; CHECK-NEXT: [[PHI47:%[0-9]+]]:f4rc = PHI [[LFSU2]], %bb.5, [[PHI40]], %bb.14 + ; CHECK-NEXT: [[PHI48:%[0-9]+]]:f4rc = PHI [[LFSU]], %bb.5, [[PHI41]], %bb.14 + ; CHECK-NEXT: [[PHI49:%[0-9]+]]:vssrc = PHI %21, %bb.5, [[PHI43]], %bb.14 + ; CHECK-NEXT: %109:f4rc = nofpexcept XSADDSP [[PHI48]], [[PHI49]] + ; CHECK-NEXT: [[STFSU5:%[0-9]+]]:g8rc_and_g8rc_nox0 = STFSU %109, 4, [[PHI45]] :: (store unknown-size into %ir.2, align 4) + ; CHECK-NEXT: [[COPY16:%[0-9]+]]:g8rc = COPY [[STFSU5]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.16: + ; CHECK-NEXT: successors: %bb.17(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI50:%[0-9]+]]:g8rc_and_g8rc_nox0 = PHI [[ADDI8_1]], %bb.4, [[COPY16]], %bb.15 + ; CHECK-NEXT: [[PHI51:%[0-9]+]]:f4rc = PHI [[LFSU2]], %bb.4, [[PHI46]], %bb.15 + ; CHECK-NEXT: [[PHI52:%[0-9]+]]:f4rc = PHI [[LFSU]], %bb.4, [[PHI47]], %bb.15 + ; CHECK-NEXT: %118:vssrc = nofpexcept XSADDSP [[PHI52]], [[PHI52]] + ; CHECK-NEXT: %119:f4rc = nofpexcept XSADDSP [[PHI52]], %118 + ; CHECK-NEXT: [[STFSU6:%[0-9]+]]:g8rc_and_g8rc_nox0 = STFSU %119, 4, [[PHI50]] :: (store unknown-size into %ir.2, align 4) + ; CHECK-NEXT: [[COPY17:%[0-9]+]]:g8rc = COPY [[STFSU6]] + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.17: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[PHI53:%[0-9]+]]:g8rc_and_g8rc_nox0 = PHI [[ADDI8_1]], %bb.3, [[COPY17]], %bb.16 + ; CHECK-NEXT: [[PHI54:%[0-9]+]]:f4rc = PHI [[LFSU]], %bb.3, [[PHI51]], %bb.16 + ; CHECK-NEXT: %126:vssrc = nofpexcept XSADDSP [[PHI54]], [[PHI54]] + ; CHECK-NEXT: %127:f4rc = nofpexcept XSADDSP [[PHI54]], %126 + ; CHECK-NEXT: [[STFSU7:%[0-9]+]]:g8rc_and_g8rc_nox0 = STFSU %127, 4, [[PHI53]] :: (store unknown-size into %ir.2, align 4) + ; CHECK-NEXT: B %bb.1 + bb.0.entry: + liveins: $x3, $x4, $x5 + + %8:g8rc = COPY $x5 + %7:g8rc_and_g8rc_nox0 = COPY $x4 + %6:g8rc_and_g8rc_nox0 = COPY $x3 + %9:g8rc = RLDICL %8, 0, 32 + %0:g8rc = ADDI8 %7, -4 + %1:g8rc = ADDI8 %6, -4 + MTCTR8loop killed %9, implicit-def dead $ctr8 + B %bb.2 + + bb.1.for.cond.cleanup: + BLR8 implicit $lr8, implicit $rm + + bb.2.for.body: + successors: %bb.2(0x7c000000), %bb.1(0x04000000) + + %2:g8rc_and_g8rc_nox0 = PHI %0, %bb.0, %5, %bb.2 + %3:g8rc_and_g8rc_nox0 = PHI %1, %bb.0, %4, %bb.2 + %10:f4rc, %11:g8rc_and_g8rc_nox0 = LFSU 4, %2 :: (load (s32) from %ir.3) + %12:vssrc = nofpexcept XSADDSP %10, %10 + %13:f4rc = nofpexcept XSADDSP %10, killed %12 + %14:g8rc_and_g8rc_nox0 = STFSU killed %13, 4, %3 :: (store (s32) into %ir.2) + %4:g8rc = COPY %14 + %5:g8rc = COPY %11 + BDNZ8 %bb.2, implicit-def dead $ctr8, implicit $ctr8 + B %bb.1 + +...