diff --git a/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h b/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h --- a/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h @@ -299,6 +299,27 @@ bool translateBinaryOp(unsigned Opcode, const User &U, MachineIRBuilder &MIRBuilder); + /// If the set of cases should be emitted as a series of branches, return + /// true. If we should emit this as a bunch of and/or'd together conditions, + /// return false. + bool shouldEmitAsBranches(const std::vector &Cases); + /// Helper method for findMergedConditions. + /// This function emits a branch and is used at the leaves of an OR or an + /// AND operator tree. + void emitBranchForMergedCondition(const Value *Cond, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, + MachineBasicBlock *CurBB, + MachineBasicBlock *SwitchBB, + BranchProbability TProb, + BranchProbability FProb, bool InvertCond); + /// Used during condbr translation to find trees of conditions that can be + /// optimized. + void findMergedConditions(const Value *Cond, MachineBasicBlock *TBB, + MachineBasicBlock *FBB, MachineBasicBlock *CurBB, + MachineBasicBlock *SwitchBB, + Instruction::BinaryOps Opc, BranchProbability TProb, + BranchProbability FProb, bool InvertCond); + /// Translate branch (br) instruction. /// \pre \p U is a branch instruction. bool translateBr(const User &U, MachineIRBuilder &MIRBuilder); diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -33,6 +33,7 @@ #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/StackProtector.h" +#include "llvm/CodeGen/SwitchLoweringUtils.h" #include "llvm/CodeGen/TargetFrameLowering.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetLowering.h" @@ -49,11 +50,13 @@ #include "llvm/IR/Function.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/InlineAsm.h" +#include "llvm/IR/InstrTypes.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Metadata.h" +#include "llvm/IR/PatternMatch.h" #include "llvm/IR/Type.h" #include "llvm/IR/User.h" #include "llvm/IR/Value.h" @@ -360,28 +363,276 @@ return CLI->lowerReturn(MIRBuilder, Ret, VRegs, SwiftErrorVReg); } +void IRTranslator::emitBranchForMergedCondition( + const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB, + MachineBasicBlock *CurBB, MachineBasicBlock *SwitchBB, + BranchProbability TProb, BranchProbability FProb, bool InvertCond) { + // If the leaf of the tree is a comparison, merge the condition into + // the caseblock. + if (const CmpInst *BOp = dyn_cast(Cond)) { + CmpInst::Predicate Condition; + if (const ICmpInst *IC = dyn_cast(Cond)) { + Condition = InvertCond ? IC->getInversePredicate() : IC->getPredicate(); + } else { + const FCmpInst *FC = cast(Cond); + Condition = InvertCond ? FC->getInversePredicate() : FC->getPredicate(); + } + + SwitchCG::CaseBlock CB(Condition, false, BOp->getOperand(0), + BOp->getOperand(1), nullptr, TBB, FBB, CurBB, + CurBuilder->getDebugLoc(), TProb, FProb); + SL->SwitchCases.push_back(CB); + return; + } + + // Create a CaseBlock record representing this branch. + CmpInst::Predicate Pred = InvertCond ? CmpInst::ICMP_NE : CmpInst::ICMP_EQ; + SwitchCG::CaseBlock CB( + Pred, false, Cond, ConstantInt::getTrue(MF->getFunction().getContext()), + nullptr, TBB, FBB, CurBB, CurBuilder->getDebugLoc(), TProb, FProb); + SL->SwitchCases.push_back(CB); +} + +static bool isValInBlock(const Value *V, const BasicBlock *BB) { + if (const Instruction *I = dyn_cast(V)) + return I->getParent() == BB; + return true; +} + +void IRTranslator::findMergedConditions( + const Value *Cond, MachineBasicBlock *TBB, MachineBasicBlock *FBB, + MachineBasicBlock *CurBB, MachineBasicBlock *SwitchBB, + Instruction::BinaryOps Opc, BranchProbability TProb, + BranchProbability FProb, bool InvertCond) { + using namespace PatternMatch; + assert((Opc == Instruction::And || Opc == Instruction::Or) && + "Expected Opc to be AND/OR"); + // Skip over not part of the tree and remember to invert op and operands at + // next level. + Value *NotCond; + if (match(Cond, m_OneUse(m_Not(m_Value(NotCond)))) && + isValInBlock(NotCond, CurBB->getBasicBlock())) { + findMergedConditions(NotCond, TBB, FBB, CurBB, SwitchBB, Opc, TProb, FProb, + !InvertCond); + return; + } + + const Instruction *BOp = dyn_cast(Cond); + // Compute the effective opcode for Cond, taking into account whether it needs + // to be inverted, e.g. + // and (not (or A, B)), C + // gets lowered as + // and (and (not A, not B), C) + unsigned BOpc = 0; + if (BOp) { + BOpc = BOp->getOpcode(); + if (InvertCond) { + if (BOpc == Instruction::And) + BOpc = Instruction::Or; + else if (BOpc == Instruction::Or) + BOpc = Instruction::And; + } + } + + // If this node is not part of the or/and tree, emit it as a branch. + if (!BOp || !(isa(BOp) || isa(BOp)) || + BOpc != static_cast(Opc) || !BOp->hasOneUse() || + BOp->getParent() != CurBB->getBasicBlock() || + !isValInBlock(BOp->getOperand(0), CurBB->getBasicBlock()) || + !isValInBlock(BOp->getOperand(1), CurBB->getBasicBlock())) { + emitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB, TProb, FProb, + InvertCond); + return; + } + + // Create TmpBB after CurBB. + MachineFunction::iterator BBI(CurBB); + MachineBasicBlock *TmpBB = + MF->CreateMachineBasicBlock(CurBB->getBasicBlock()); + CurBB->getParent()->insert(++BBI, TmpBB); + + if (Opc == Instruction::Or) { + // Codegen X | Y as: + // BB1: + // jmp_if_X TBB + // jmp TmpBB + // TmpBB: + // jmp_if_Y TBB + // jmp FBB + // + + // We have flexibility in setting Prob for BB1 and Prob for TmpBB. + // The requirement is that + // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB) + // = TrueProb for original BB. + // Assuming the original probabilities are A and B, one choice is to set + // BB1's probabilities to A/2 and A/2+B, and set TmpBB's probabilities to + // A/(1+B) and 2B/(1+B). This choice assumes that + // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB. + // Another choice is to assume TrueProb for BB1 equals to TrueProb for + // TmpBB, but the math is more complicated. + + auto NewTrueProb = TProb / 2; + auto NewFalseProb = TProb / 2 + FProb; + // Emit the LHS condition. + findMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, SwitchBB, Opc, + NewTrueProb, NewFalseProb, InvertCond); + + // Normalize A/2 and B to get A/(1+B) and 2B/(1+B). + SmallVector Probs{TProb / 2, FProb}; + BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end()); + // Emit the RHS condition into TmpBB. + findMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc, + Probs[0], Probs[1], InvertCond); + } else { + assert(Opc == Instruction::And && "Unknown merge op!"); + // Codegen X & Y as: + // BB1: + // jmp_if_X TmpBB + // jmp FBB + // TmpBB: + // jmp_if_Y TBB + // jmp FBB + // + // This requires creation of TmpBB after CurBB. + + // We have flexibility in setting Prob for BB1 and Prob for TmpBB. + // The requirement is that + // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB) + // = FalseProb for original BB. + // Assuming the original probabilities are A and B, one choice is to set + // BB1's probabilities to A+B/2 and B/2, and set TmpBB's probabilities to + // 2A/(1+A) and B/(1+A). This choice assumes that FalseProb for BB1 == + // TrueProb for BB1 * FalseProb for TmpBB. + + auto NewTrueProb = TProb + FProb / 2; + auto NewFalseProb = FProb / 2; + // Emit the LHS condition. + findMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, SwitchBB, Opc, + NewTrueProb, NewFalseProb, InvertCond); + + // Normalize A and B/2 to get 2A/(1+A) and B/(1+A). + SmallVector Probs{TProb, FProb / 2}; + BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end()); + // Emit the RHS condition into TmpBB. + findMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc, + Probs[0], Probs[1], InvertCond); + } +} + +bool IRTranslator::shouldEmitAsBranches( + const std::vector &Cases) { + // For multiple cases, it's better to emit as branches. + if (Cases.size() != 2) + return true; + + // If this is two comparisons of the same values or'd or and'd together, they + // will get folded into a single comparison, so don't emit two blocks. + if ((Cases[0].CmpLHS == Cases[1].CmpLHS && + Cases[0].CmpRHS == Cases[1].CmpRHS) || + (Cases[0].CmpRHS == Cases[1].CmpLHS && + Cases[0].CmpLHS == Cases[1].CmpRHS)) { + return false; + } + + // Handle: (X != null) | (Y != null) --> (X|Y) != 0 + // Handle: (X == null) & (Y == null) --> (X|Y) == 0 + if (Cases[0].CmpRHS == Cases[1].CmpRHS && + Cases[0].PredInfo.Pred == Cases[1].PredInfo.Pred && + isa(Cases[0].CmpRHS) && + cast(Cases[0].CmpRHS)->isNullValue()) { + if (Cases[0].PredInfo.Pred == CmpInst::ICMP_EQ && + Cases[0].TrueBB == Cases[1].ThisBB) + return false; + if (Cases[0].PredInfo.Pred == CmpInst::ICMP_NE && + Cases[0].FalseBB == Cases[1].ThisBB) + return false; + } + + return true; +} + bool IRTranslator::translateBr(const User &U, MachineIRBuilder &MIRBuilder) { const BranchInst &BrInst = cast(U); - unsigned Succ = 0; - if (!BrInst.isUnconditional()) { - // We want a G_BRCOND to the true BB followed by an unconditional branch. - Register Tst = getOrCreateVReg(*BrInst.getCondition()); - const BasicBlock &TrueTgt = *cast(BrInst.getSuccessor(Succ++)); - MachineBasicBlock &TrueBB = getMBB(TrueTgt); - MIRBuilder.buildBrCond(Tst, TrueBB); + auto &CurMBB = MIRBuilder.getMBB(); + auto *Succ0MBB = &getMBB(*BrInst.getSuccessor(0)); + + if (BrInst.isUnconditional()) { + // If the unconditional target is the layout successor, fallthrough. + if (!CurMBB.isLayoutSuccessor(Succ0MBB)) + MIRBuilder.buildBr(*Succ0MBB); + + // Link successors. + for (const BasicBlock *Succ : successors(&BrInst)) + CurMBB.addSuccessor(&getMBB(*Succ)); + return true; } - const BasicBlock &BrTgt = *cast(BrInst.getSuccessor(Succ)); - MachineBasicBlock &TgtBB = getMBB(BrTgt); - MachineBasicBlock &CurBB = MIRBuilder.getMBB(); + // If this condition is one of the special cases we handle, do special stuff + // now. + const Value *CondVal = BrInst.getCondition(); + MachineBasicBlock *Succ1MBB = &getMBB(*BrInst.getSuccessor(1)); - // If the unconditional target is the layout successor, fallthrough. - if (!CurBB.isLayoutSuccessor(&TgtBB)) - MIRBuilder.buildBr(TgtBB); + const auto &TLI = *MF->getSubtarget().getTargetLowering(); - // Link successors. - for (const BasicBlock *Succ : successors(&BrInst)) - CurBB.addSuccessor(&getMBB(*Succ)); + // If this is a series of conditions that are or'd or and'd together, emit + // this as a sequence of branches instead of setcc's with and/or operations. + // As long as jumps are not expensive (exceptions for multi-use logic ops, + // unpredictable branches, and vector extracts because those jumps are likely + // expensive for any target), this should improve performance. + // For example, instead of something like: + // cmp A, B + // C = seteq + // cmp D, E + // F = setle + // or C, F + // jnz foo + // Emit: + // cmp A, B + // je foo + // cmp D, E + // jle foo + using namespace PatternMatch; + if (const BinaryOperator *BOp = dyn_cast(CondVal)) { + Instruction::BinaryOps Opcode = BOp->getOpcode(); + Value *Vec, *BOp0 = BOp->getOperand(0), *BOp1 = BOp->getOperand(1); + if (!TLI.isJumpExpensive() && BOp->hasOneUse() && + !BrInst.hasMetadata(LLVMContext::MD_unpredictable) && + (Opcode == Instruction::And || Opcode == Instruction::Or) && + !(match(BOp0, m_ExtractElt(m_Value(Vec), m_Value())) && + match(BOp1, m_ExtractElt(m_Specific(Vec), m_Value())))) { + findMergedConditions(BOp, Succ0MBB, Succ1MBB, &CurMBB, &CurMBB, Opcode, + getEdgeProbability(&CurMBB, Succ0MBB), + getEdgeProbability(&CurMBB, Succ1MBB), + /*InvertCond=*/false); + assert(SL->SwitchCases[0].ThisBB == &CurMBB && "Unexpected lowering!"); + + // Allow some cases to be rejected. + if (shouldEmitAsBranches(SL->SwitchCases)) { + // Emit the branch for this block. + emitSwitchCase(SL->SwitchCases[0], &CurMBB, *CurBuilder); + SL->SwitchCases.erase(SL->SwitchCases.begin()); + return true; + } + + // Okay, we decided not to do this, remove any inserted MBB's and clear + // SwitchCases. + for (unsigned I = 1, E = SL->SwitchCases.size(); I != E; ++I) + MF->erase(SL->SwitchCases[I].ThisBB); + + SL->SwitchCases.clear(); + } + } + + // Create a CaseBlock record representing this branch. + SwitchCG::CaseBlock CB(CmpInst::ICMP_EQ, false, CondVal, + ConstantInt::getTrue(MF->getFunction().getContext()), + nullptr, Succ0MBB, Succ1MBB, &CurMBB, + CurBuilder->getDebugLoc()); + + // Use emitSwitchCase to actually insert the fast branch sequence for this + // cond branch. + emitSwitchCase(CB, &CurMBB, *CurBuilder); return true; } @@ -567,8 +818,23 @@ const LLT i1Ty = LLT::scalar(1); // Build the compare. if (!CB.CmpMHS) { - Register CondRHS = getOrCreateVReg(*CB.CmpRHS); - Cond = MIB.buildICmp(CB.PredInfo.Pred, i1Ty, CondLHS, CondRHS).getReg(0); + const auto *CI = dyn_cast(CB.CmpRHS); + // For conditional branch lowering, we might try to do something silly like + // emit an G_ICMP to compare an existing G_ICMP i1 result with true. If so, + // just re-use the existing condition vreg. + if (CI && CI->getZExtValue() == 1 && + MRI->getType(CondLHS).getSizeInBits() == 1 && + CB.PredInfo.Pred == CmpInst::ICMP_EQ) { + Cond = CondLHS; + } else { + Register CondRHS = getOrCreateVReg(*CB.CmpRHS); + if (CmpInst::isFPPredicate(CB.PredInfo.Pred)) + Cond = + MIB.buildFCmp(CB.PredInfo.Pred, i1Ty, CondLHS, CondRHS).getReg(0); + else + Cond = + MIB.buildICmp(CB.PredInfo.Pred, i1Ty, CondLHS, CondRHS).getReg(0); + } } else { assert(CB.PredInfo.Pred == CmpInst::ICMP_SLE && "Can only handle SLE ranges"); @@ -601,17 +867,8 @@ addSuccessorWithProb(CB.ThisBB, CB.FalseBB, CB.FalseProb); CB.ThisBB->normalizeSuccProbs(); - // if (SwitchBB->getBasicBlock() != CB.FalseBB->getBasicBlock()) - addMachineCFGPred({SwitchBB->getBasicBlock(), CB.FalseBB->getBasicBlock()}, - CB.ThisBB); - - // If the lhs block is the next block, invert the condition so that we can - // fall through to the lhs instead of the rhs block. - if (CB.TrueBB == CB.ThisBB->getNextNode()) { - std::swap(CB.TrueBB, CB.FalseBB); - auto True = MIB.buildConstant(i1Ty, 1); - Cond = MIB.buildXor(i1Ty, Cond, True).getReg(0); - } + addMachineCFGPred({SwitchBB->getBasicBlock(), CB.FalseBB->getBasicBlock()}, + CB.ThisBB); MIB.buildBrCond(Cond, *CB.TrueBB); MIB.buildBr(*CB.FalseBB); @@ -2590,6 +2847,10 @@ emitJumpTable(JTCase.second, JTCase.second.MBB); } SL->JTCases.clear(); + + for (auto &SwCase : SL->SwitchCases) + emitSwitchCase(SwCase, &CurBuilder->getMBB(), *CurBuilder); + SL->SwitchCases.clear(); } void IRTranslator::finalizeFunction() { diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-switch.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-switch.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-switch.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-irtranslator-switch.ll @@ -1313,10 +1313,8 @@ ; CHECK: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[COPY]], [[C1]] ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ule), [[SUB]](s32), [[C5]] - ; CHECK: [[C6:%[0-9]+]]:_(s1) = G_CONSTANT i1 true - ; CHECK: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[C6]] - ; CHECK: G_BRCOND [[XOR]](s1), %bb.4 - ; CHECK: G_BR %bb.2 + ; CHECK: G_BRCOND [[ICMP1]](s1), %bb.2 + ; CHECK: G_BR %bb.4 ; CHECK: bb.2.sw.bb: ; CHECK: successors: %bb.4(0x80000000) ; CHECK: [[ADD:%[0-9]+]]:_(s32) = nsw G_ADD [[COPY]], [[C3]] diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-condbr-lower-tree.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-condbr-lower-tree.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-condbr-lower-tree.ll @@ -0,0 +1,234 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc -mtriple aarch64 -stop-after=irtranslator -global-isel -verify-machineinstrs %s -o - 2>&1 | FileCheck %s + +declare i32 @bar(...) +define void @or_cond(i32 %X, i32 %Y, i32 %Z) nounwind { + ; CHECK-LABEL: name: or_cond + ; CHECK: bb.1.entry: + ; CHECK: successors: %bb.2(0x40000000), %bb.4(0x40000000) + ; CHECK: liveins: $w0, $w1, $w2 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY1]](s32), [[C1]] + ; CHECK: [[OR:%[0-9]+]]:_(s1) = G_OR [[ICMP1]], [[ICMP]] + ; CHECK: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY1]](s32), [[C1]] + ; CHECK: G_BRCOND [[ICMP2]](s1), %bb.2 + ; CHECK: G_BR %bb.4 + ; CHECK: bb.4.entry: + ; CHECK: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; CHECK: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; CHECK: G_BRCOND [[ICMP3]](s1), %bb.2 + ; CHECK: G_BR %bb.3 + ; CHECK: bb.2.cond_true: + ; CHECK: TCRETURNdi @bar, 0, csr_aarch64_aapcs, implicit $sp + ; CHECK: bb.3.UnifiedReturnBlock: + ; CHECK: RET_ReallyLR +entry: + %tmp1 = icmp eq i32 %X, 0 + %tmp3 = icmp slt i32 %Y, 5 + %tmp4 = or i1 %tmp3, %tmp1 + br i1 %tmp4, label %cond_true, label %UnifiedReturnBlock + +cond_true: + %tmp5 = tail call i32 (...) @bar( ) + ret void + +UnifiedReturnBlock: + ret void +} + +define void @and_cond(i32 %X, i32 %Y, i32 %Z) nounwind { + ; CHECK-LABEL: name: and_cond + ; CHECK: bb.1.entry: + ; CHECK: successors: %bb.4(0x40000000), %bb.3(0x40000000) + ; CHECK: liveins: $w0, $w1, $w2 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY1]](s32), [[C1]] + ; CHECK: [[AND:%[0-9]+]]:_(s1) = G_AND [[ICMP1]], [[ICMP]] + ; CHECK: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY1]](s32), [[C1]] + ; CHECK: G_BRCOND [[ICMP2]](s1), %bb.4 + ; CHECK: G_BR %bb.3 + ; CHECK: bb.4.entry: + ; CHECK: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; CHECK: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; CHECK: G_BRCOND [[ICMP3]](s1), %bb.2 + ; CHECK: G_BR %bb.3 + ; CHECK: bb.2.cond_true: + ; CHECK: TCRETURNdi @bar, 0, csr_aarch64_aapcs, implicit $sp + ; CHECK: bb.3.UnifiedReturnBlock: + ; CHECK: RET_ReallyLR +entry: + %tmp1 = icmp eq i32 %X, 0 + %tmp3 = icmp slt i32 %Y, 5 + %tmp4 = and i1 %tmp3, %tmp1 + br i1 %tmp4, label %cond_true, label %UnifiedReturnBlock + +cond_true: + %tmp5 = tail call i32 (...) @bar( ) + ret void + +UnifiedReturnBlock: + ret void +} + +; Don't emit two branches for same operands. +define void @or_cond_same_values_cmp(i32 %X, i32 %Y, i32 %Z) nounwind { + ; CHECK-LABEL: name: or_cond_same_values_cmp + ; CHECK: bb.1.entry: + ; CHECK: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; CHECK: liveins: $w0, $w1, $w2 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY]](s32), [[C]] + ; CHECK: [[OR:%[0-9]+]]:_(s1) = G_OR [[ICMP1]], [[ICMP]] + ; CHECK: G_BRCOND [[OR]](s1), %bb.2 + ; CHECK: G_BR %bb.3 + ; CHECK: bb.2.cond_true: + ; CHECK: TCRETURNdi @bar, 0, csr_aarch64_aapcs, implicit $sp + ; CHECK: bb.3.UnifiedReturnBlock: + ; CHECK: RET_ReallyLR +entry: + %tmp1 = icmp eq i32 %X, 5 + %tmp3 = icmp slt i32 %X, 5 + %tmp4 = or i1 %tmp3, %tmp1 + br i1 %tmp4, label %cond_true, label %UnifiedReturnBlock + +cond_true: + %tmp5 = tail call i32 (...) @bar( ) + ret void + +UnifiedReturnBlock: + ret void +} + +; Emit multiple branches for more than 2 cases. +define void @or_cond_multiple_cases(i32 %X, i32 %Y, i32 %Z) nounwind { + ; CHECK-LABEL: name: or_cond_multiple_cases + ; CHECK: bb.1.entry: + ; CHECK: successors: %bb.2(0x40000000), %bb.5(0x40000000) + ; CHECK: liveins: $w0, $w1, $w2 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY]](s32), [[C]] + ; CHECK: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] + ; CHECK: [[OR:%[0-9]+]]:_(s1) = G_OR [[ICMP1]], [[ICMP]] + ; CHECK: [[OR1:%[0-9]+]]:_(s1) = G_OR [[OR]], [[ICMP2]] + ; CHECK: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY]](s32), [[C]] + ; CHECK: G_BRCOND [[ICMP3]](s1), %bb.2 + ; CHECK: G_BR %bb.5 + ; CHECK: bb.5.entry: + ; CHECK: successors: %bb.2(0x40000000), %bb.4(0x40000000) + ; CHECK: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; CHECK: G_BRCOND [[ICMP4]](s1), %bb.2 + ; CHECK: G_BR %bb.4 + ; CHECK: bb.4.entry: + ; CHECK: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; CHECK: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY2]](s32), [[C]] + ; CHECK: G_BRCOND [[ICMP5]](s1), %bb.2 + ; CHECK: G_BR %bb.3 + ; CHECK: bb.2.cond_true: + ; CHECK: TCRETURNdi @bar, 0, csr_aarch64_aapcs, implicit $sp + ; CHECK: bb.3.UnifiedReturnBlock: + ; CHECK: RET_ReallyLR +entry: + %tmp1 = icmp eq i32 %X, 5 + %tmp3 = icmp slt i32 %X, 5 + %tmpZ = icmp eq i32 %Z, 5 + %tmp4 = or i1 %tmp3, %tmp1 + %final = or i1 %tmp4, %tmpZ + br i1 %final, label %cond_true, label %UnifiedReturnBlock + +cond_true: + %tmp5 = tail call i32 (...) @bar( ) + ret void + +UnifiedReturnBlock: + ret void +} + +; (X != null) | (Y != null) --> (X|Y) != 0 +; Don't emit two branches. +define void @or_cond_ne_null(i32 %X, i32 %Y, i32 %Z) nounwind { + ; CHECK-LABEL: name: or_cond_ne_null + ; CHECK: bb.1.entry: + ; CHECK: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; CHECK: liveins: $w0, $w1, $w2 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s32), [[C]] + ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY1]](s32), [[C]] + ; CHECK: [[OR:%[0-9]+]]:_(s1) = G_OR [[ICMP1]], [[ICMP]] + ; CHECK: G_BRCOND [[OR]](s1), %bb.2 + ; CHECK: G_BR %bb.3 + ; CHECK: bb.2.cond_true: + ; CHECK: TCRETURNdi @bar, 0, csr_aarch64_aapcs, implicit $sp + ; CHECK: bb.3.UnifiedReturnBlock: + ; CHECK: RET_ReallyLR +entry: + %tmp1 = icmp ne i32 %X, 0 + %tmp3 = icmp ne i32 %Y, 0 + %tmp4 = or i1 %tmp3, %tmp1 + br i1 %tmp4, label %cond_true, label %UnifiedReturnBlock + +cond_true: + %tmp5 = tail call i32 (...) @bar( ) + ret void + +UnifiedReturnBlock: + ret void +} + +; If the branch is unpredictable, don't add another branch +; regardless of whether they are expensive or not. + +define void @unpredictable(i32 %X, i32 %Y, i32 %Z) nounwind { + ; CHECK-LABEL: name: unpredictable + ; CHECK: bb.1.entry: + ; CHECK: successors: %bb.2(0x40000000), %bb.3(0x40000000) + ; CHECK: liveins: $w0, $w1, $w2 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $w2 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 + ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] + ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[COPY1]](s32), [[C1]] + ; CHECK: [[OR:%[0-9]+]]:_(s1) = G_OR [[ICMP1]], [[ICMP]] + ; CHECK: G_BRCOND [[OR]](s1), %bb.2 + ; CHECK: G_BR %bb.3 + ; CHECK: bb.2.cond_true: + ; CHECK: TCRETURNdi @bar, 0, csr_aarch64_aapcs, implicit $sp + ; CHECK: bb.3.UnifiedReturnBlock: + ; CHECK: RET_ReallyLR +entry: + %tmp1 = icmp eq i32 %X, 0 + %tmp3 = icmp slt i32 %Y, 5 + %tmp4 = or i1 %tmp3, %tmp1 + br i1 %tmp4, label %cond_true, label %UnifiedReturnBlock, !unpredictable !0 + +cond_true: + %tmp5 = tail call i32 (...) @bar( ) + ret void + +UnifiedReturnBlock: + ret void +} + +!0 = !{} diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/long_ambiguous_chain_s32.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/long_ambiguous_chain_s32.ll --- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/long_ambiguous_chain_s32.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/long_ambiguous_chain_s32.ll @@ -20,88 +20,100 @@ ; MIPS32-NEXT: sw $7, 28($sp) # 4-byte Folded Spill ; MIPS32-NEXT: sw $2, 24($sp) # 4-byte Folded Spill ; MIPS32-NEXT: sw $3, 20($sp) # 4-byte Folded Spill -; MIPS32-NEXT: bnez $8, $BB0_9 +; MIPS32-NEXT: bnez $8, $BB0_12 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.1: # %pre.PHI.1 +; MIPS32-NEXT: # %bb.1: # %entry +; MIPS32-NEXT: j $BB0_2 +; MIPS32-NEXT: nop +; MIPS32-NEXT: $BB0_2: # %pre.PHI.1 ; MIPS32-NEXT: lw $1, 36($sp) # 4-byte Folded Reload ; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB0_4 +; MIPS32-NEXT: bnez $2, $BB0_7 +; MIPS32-NEXT: nop +; MIPS32-NEXT: # %bb.3: # %pre.PHI.1 +; MIPS32-NEXT: j $BB0_4 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.2: # %pre.PHI.1.0 +; MIPS32-NEXT: $BB0_4: # %pre.PHI.1.0 ; MIPS32-NEXT: lw $1, 32($sp) # 4-byte Folded Reload ; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB0_5 +; MIPS32-NEXT: bnez $2, $BB0_8 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.3: # %b.PHI.1.0 +; MIPS32-NEXT: # %bb.5: # %pre.PHI.1.0 +; MIPS32-NEXT: j $BB0_6 +; MIPS32-NEXT: nop +; MIPS32-NEXT: $BB0_6: # %b.PHI.1.0 ; MIPS32-NEXT: lw $1, 28($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 0($1) ; MIPS32-NEXT: sw $2, 16($sp) # 4-byte Folded Spill -; MIPS32-NEXT: j $BB0_6 +; MIPS32-NEXT: j $BB0_9 ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB0_4: # %b.PHI.1.1 +; MIPS32-NEXT: $BB0_7: # %b.PHI.1.1 ; MIPS32-NEXT: lw $1, 44($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 0($1) ; MIPS32-NEXT: sw $2, 16($sp) # 4-byte Folded Spill -; MIPS32-NEXT: j $BB0_6 +; MIPS32-NEXT: j $BB0_9 ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB0_5: # %b.PHI.1.2 +; MIPS32-NEXT: $BB0_8: # %b.PHI.1.2 ; MIPS32-NEXT: lw $1, 24($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 0($1) ; MIPS32-NEXT: sw $2, 16($sp) # 4-byte Folded Spill -; MIPS32-NEXT: $BB0_6: # %b.PHI.1 +; MIPS32-NEXT: $BB0_9: # %b.PHI.1 ; MIPS32-NEXT: lw $1, 16($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 32($sp) # 4-byte Folded Reload ; MIPS32-NEXT: andi $3, $2, 1 ; MIPS32-NEXT: move $4, $1 ; MIPS32-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS32-NEXT: sw $4, 8($sp) # 4-byte Folded Spill -; MIPS32-NEXT: bnez $3, $BB0_8 +; MIPS32-NEXT: bnez $3, $BB0_11 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.7: # %b.PHI.1 -; MIPS32-NEXT: j $BB0_15 +; MIPS32-NEXT: # %bb.10: # %b.PHI.1 +; MIPS32-NEXT: j $BB0_19 ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB0_8: # %b.PHI.1.end +; MIPS32-NEXT: $BB0_11: # %b.PHI.1.end ; MIPS32-NEXT: lw $1, 12($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 20($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sw $1, 0($2) ; MIPS32-NEXT: addiu $sp, $sp, 48 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB0_9: # %pre.PHI.2 +; MIPS32-NEXT: $BB0_12: # %pre.PHI.2 ; MIPS32-NEXT: lw $1, 40($sp) # 4-byte Folded Reload ; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB0_11 +; MIPS32-NEXT: bnez $2, $BB0_14 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.10: # %pre.PHI.2 -; MIPS32-NEXT: j $BB0_12 +; MIPS32-NEXT: # %bb.13: # %pre.PHI.2 +; MIPS32-NEXT: j $BB0_15 ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB0_11: # %b.PHI.2.0 +; MIPS32-NEXT: $BB0_14: # %b.PHI.2.0 ; MIPS32-NEXT: lw $1, 28($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 0($1) ; MIPS32-NEXT: sw $2, 4($sp) # 4-byte Folded Spill -; MIPS32-NEXT: j $BB0_13 +; MIPS32-NEXT: j $BB0_16 ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB0_12: # %b.PHI.2.1 +; MIPS32-NEXT: $BB0_15: # %b.PHI.2.1 ; MIPS32-NEXT: lw $1, 44($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 0($1) ; MIPS32-NEXT: sw $2, 4($sp) # 4-byte Folded Spill -; MIPS32-NEXT: $BB0_13: # %b.PHI.2 +; MIPS32-NEXT: $BB0_16: # %b.PHI.2 ; MIPS32-NEXT: lw $1, 4($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 36($sp) # 4-byte Folded Reload ; MIPS32-NEXT: andi $3, $2, 1 ; MIPS32-NEXT: move $4, $1 ; MIPS32-NEXT: sw $1, 0($sp) # 4-byte Folded Spill ; MIPS32-NEXT: sw $4, 8($sp) # 4-byte Folded Spill -; MIPS32-NEXT: bnez $3, $BB0_15 +; MIPS32-NEXT: bnez $3, $BB0_19 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.14: # %b.PHI.2.end +; MIPS32-NEXT: # %bb.17: # %b.PHI.2 +; MIPS32-NEXT: j $BB0_18 +; MIPS32-NEXT: nop +; MIPS32-NEXT: $BB0_18: # %b.PHI.2.end ; MIPS32-NEXT: lw $1, 0($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 20($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sw $1, 0($2) ; MIPS32-NEXT: addiu $sp, $sp, 48 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB0_15: # %b.PHI.3 +; MIPS32-NEXT: $BB0_19: # %b.PHI.3 ; MIPS32-NEXT: lw $1, 8($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 8($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $3, 32($sp) # 4-byte Folded Reload @@ -197,35 +209,44 @@ ; MIPS32-NEXT: sw $2, 32($sp) # 4-byte Folded Spill ; MIPS32-NEXT: sw $3, 28($sp) # 4-byte Folded Spill ; MIPS32-NEXT: sw $8, 24($sp) # 4-byte Folded Spill -; MIPS32-NEXT: bnez $9, $BB1_9 +; MIPS32-NEXT: bnez $9, $BB1_12 +; MIPS32-NEXT: nop +; MIPS32-NEXT: # %bb.1: # %entry +; MIPS32-NEXT: j $BB1_2 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.1: # %pre.PHI.1 +; MIPS32-NEXT: $BB1_2: # %pre.PHI.1 ; MIPS32-NEXT: lw $1, 44($sp) # 4-byte Folded Reload ; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB1_4 +; MIPS32-NEXT: bnez $2, $BB1_7 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.2: # %pre.PHI.1.0 +; MIPS32-NEXT: # %bb.3: # %pre.PHI.1 +; MIPS32-NEXT: j $BB1_4 +; MIPS32-NEXT: nop +; MIPS32-NEXT: $BB1_4: # %pre.PHI.1.0 ; MIPS32-NEXT: lw $1, 40($sp) # 4-byte Folded Reload ; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB1_5 +; MIPS32-NEXT: bnez $2, $BB1_8 +; MIPS32-NEXT: nop +; MIPS32-NEXT: # %bb.5: # %pre.PHI.1.0 +; MIPS32-NEXT: j $BB1_6 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.3: # %b.PHI.1.0 +; MIPS32-NEXT: $BB1_6: # %b.PHI.1.0 ; MIPS32-NEXT: lw $1, 36($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 0($1) ; MIPS32-NEXT: sw $2, 20($sp) # 4-byte Folded Spill -; MIPS32-NEXT: j $BB1_6 +; MIPS32-NEXT: j $BB1_9 ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB1_4: # %b.PHI.1.1 +; MIPS32-NEXT: $BB1_7: # %b.PHI.1.1 ; MIPS32-NEXT: lw $1, 52($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 0($1) ; MIPS32-NEXT: sw $2, 20($sp) # 4-byte Folded Spill -; MIPS32-NEXT: j $BB1_6 +; MIPS32-NEXT: j $BB1_9 ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB1_5: # %b.PHI.1.2 +; MIPS32-NEXT: $BB1_8: # %b.PHI.1.2 ; MIPS32-NEXT: lw $1, 32($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 0($1) ; MIPS32-NEXT: sw $2, 20($sp) # 4-byte Folded Spill -; MIPS32-NEXT: $BB1_6: # %b.PHI.1 +; MIPS32-NEXT: $BB1_9: # %b.PHI.1 ; MIPS32-NEXT: lw $1, 20($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 40($sp) # 4-byte Folded Reload ; MIPS32-NEXT: andi $3, $2, 1 @@ -234,37 +255,37 @@ ; MIPS32-NEXT: sw $1, 16($sp) # 4-byte Folded Spill ; MIPS32-NEXT: sw $4, 12($sp) # 4-byte Folded Spill ; MIPS32-NEXT: sw $5, 8($sp) # 4-byte Folded Spill -; MIPS32-NEXT: bnez $3, $BB1_8 +; MIPS32-NEXT: bnez $3, $BB1_11 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.7: # %b.PHI.1 -; MIPS32-NEXT: j $BB1_15 +; MIPS32-NEXT: # %bb.10: # %b.PHI.1 +; MIPS32-NEXT: j $BB1_19 ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB1_8: # %b.PHI.1.end +; MIPS32-NEXT: $BB1_11: # %b.PHI.1.end ; MIPS32-NEXT: lw $1, 16($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 28($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sw $1, 0($2) ; MIPS32-NEXT: addiu $sp, $sp, 56 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB1_9: # %pre.PHI.2 +; MIPS32-NEXT: $BB1_12: # %pre.PHI.2 ; MIPS32-NEXT: lw $1, 48($sp) # 4-byte Folded Reload ; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB1_11 +; MIPS32-NEXT: bnez $2, $BB1_14 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.10: # %pre.PHI.2 -; MIPS32-NEXT: j $BB1_12 +; MIPS32-NEXT: # %bb.13: # %pre.PHI.2 +; MIPS32-NEXT: j $BB1_15 ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB1_11: # %b.PHI.2.0 +; MIPS32-NEXT: $BB1_14: # %b.PHI.2.0 ; MIPS32-NEXT: lw $1, 36($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 0($1) ; MIPS32-NEXT: sw $2, 4($sp) # 4-byte Folded Spill -; MIPS32-NEXT: j $BB1_13 +; MIPS32-NEXT: j $BB1_16 ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB1_12: # %b.PHI.2.1 +; MIPS32-NEXT: $BB1_15: # %b.PHI.2.1 ; MIPS32-NEXT: lw $1, 52($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 0($1) ; MIPS32-NEXT: sw $2, 4($sp) # 4-byte Folded Spill -; MIPS32-NEXT: $BB1_13: # %b.PHI.2 +; MIPS32-NEXT: $BB1_16: # %b.PHI.2 ; MIPS32-NEXT: lw $1, 4($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 44($sp) # 4-byte Folded Reload ; MIPS32-NEXT: andi $3, $2, 1 @@ -273,16 +294,19 @@ ; MIPS32-NEXT: sw $1, 0($sp) # 4-byte Folded Spill ; MIPS32-NEXT: sw $4, 12($sp) # 4-byte Folded Spill ; MIPS32-NEXT: sw $5, 8($sp) # 4-byte Folded Spill -; MIPS32-NEXT: bnez $3, $BB1_15 +; MIPS32-NEXT: bnez $3, $BB1_19 +; MIPS32-NEXT: nop +; MIPS32-NEXT: # %bb.17: # %b.PHI.2 +; MIPS32-NEXT: j $BB1_18 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.14: # %b.PHI.2.end +; MIPS32-NEXT: $BB1_18: # %b.PHI.2.end ; MIPS32-NEXT: lw $1, 0($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 28($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sw $1, 0($2) ; MIPS32-NEXT: addiu $sp, $sp, 56 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB1_15: # %b.PHI.3 +; MIPS32-NEXT: $BB1_19: # %b.PHI.3 ; MIPS32-NEXT: lw $1, 8($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 12($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $3, 40($sp) # 4-byte Folded Reload @@ -375,88 +399,100 @@ ; MIPS32-NEXT: sw $7, 28($sp) # 4-byte Folded Spill ; MIPS32-NEXT: sw $2, 24($sp) # 4-byte Folded Spill ; MIPS32-NEXT: sw $3, 20($sp) # 4-byte Folded Spill -; MIPS32-NEXT: bnez $8, $BB2_9 +; MIPS32-NEXT: bnez $8, $BB2_12 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.1: # %pre.PHI.1 +; MIPS32-NEXT: # %bb.1: # %entry +; MIPS32-NEXT: j $BB2_2 +; MIPS32-NEXT: nop +; MIPS32-NEXT: $BB2_2: # %pre.PHI.1 ; MIPS32-NEXT: lw $1, 36($sp) # 4-byte Folded Reload ; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB2_4 +; MIPS32-NEXT: bnez $2, $BB2_7 +; MIPS32-NEXT: nop +; MIPS32-NEXT: # %bb.3: # %pre.PHI.1 +; MIPS32-NEXT: j $BB2_4 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.2: # %pre.PHI.1.0 +; MIPS32-NEXT: $BB2_4: # %pre.PHI.1.0 ; MIPS32-NEXT: lw $1, 32($sp) # 4-byte Folded Reload ; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB2_5 +; MIPS32-NEXT: bnez $2, $BB2_8 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.3: # %b.PHI.1.0 +; MIPS32-NEXT: # %bb.5: # %pre.PHI.1.0 +; MIPS32-NEXT: j $BB2_6 +; MIPS32-NEXT: nop +; MIPS32-NEXT: $BB2_6: # %b.PHI.1.0 ; MIPS32-NEXT: lw $1, 28($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 0($1) ; MIPS32-NEXT: sw $2, 16($sp) # 4-byte Folded Spill -; MIPS32-NEXT: j $BB2_6 +; MIPS32-NEXT: j $BB2_9 ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB2_4: # %b.PHI.1.1 +; MIPS32-NEXT: $BB2_7: # %b.PHI.1.1 ; MIPS32-NEXT: lw $1, 44($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 0($1) ; MIPS32-NEXT: sw $2, 16($sp) # 4-byte Folded Spill -; MIPS32-NEXT: j $BB2_6 +; MIPS32-NEXT: j $BB2_9 ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB2_5: # %b.PHI.1.2 +; MIPS32-NEXT: $BB2_8: # %b.PHI.1.2 ; MIPS32-NEXT: lw $1, 24($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 0($1) ; MIPS32-NEXT: sw $2, 16($sp) # 4-byte Folded Spill -; MIPS32-NEXT: $BB2_6: # %b.PHI.1 +; MIPS32-NEXT: $BB2_9: # %b.PHI.1 ; MIPS32-NEXT: lw $1, 16($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 32($sp) # 4-byte Folded Reload ; MIPS32-NEXT: andi $3, $2, 1 ; MIPS32-NEXT: move $4, $1 ; MIPS32-NEXT: sw $1, 12($sp) # 4-byte Folded Spill ; MIPS32-NEXT: sw $4, 8($sp) # 4-byte Folded Spill -; MIPS32-NEXT: bnez $3, $BB2_8 +; MIPS32-NEXT: bnez $3, $BB2_11 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.7: # %b.PHI.1 -; MIPS32-NEXT: j $BB2_15 +; MIPS32-NEXT: # %bb.10: # %b.PHI.1 +; MIPS32-NEXT: j $BB2_19 ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB2_8: # %b.PHI.1.end +; MIPS32-NEXT: $BB2_11: # %b.PHI.1.end ; MIPS32-NEXT: lw $1, 12($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 20($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sw $1, 0($2) ; MIPS32-NEXT: addiu $sp, $sp, 48 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB2_9: # %pre.PHI.2 +; MIPS32-NEXT: $BB2_12: # %pre.PHI.2 ; MIPS32-NEXT: lw $1, 40($sp) # 4-byte Folded Reload ; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB2_11 +; MIPS32-NEXT: bnez $2, $BB2_14 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.10: # %pre.PHI.2 -; MIPS32-NEXT: j $BB2_12 +; MIPS32-NEXT: # %bb.13: # %pre.PHI.2 +; MIPS32-NEXT: j $BB2_15 ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB2_11: # %b.PHI.2.0 +; MIPS32-NEXT: $BB2_14: # %b.PHI.2.0 ; MIPS32-NEXT: lw $1, 28($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 0($1) ; MIPS32-NEXT: sw $2, 4($sp) # 4-byte Folded Spill -; MIPS32-NEXT: j $BB2_13 +; MIPS32-NEXT: j $BB2_16 ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB2_12: # %b.PHI.2.1 +; MIPS32-NEXT: $BB2_15: # %b.PHI.2.1 ; MIPS32-NEXT: lw $1, 44($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 0($1) ; MIPS32-NEXT: sw $2, 4($sp) # 4-byte Folded Spill -; MIPS32-NEXT: $BB2_13: # %b.PHI.2 +; MIPS32-NEXT: $BB2_16: # %b.PHI.2 ; MIPS32-NEXT: lw $1, 4($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 36($sp) # 4-byte Folded Reload ; MIPS32-NEXT: andi $3, $2, 1 ; MIPS32-NEXT: move $4, $1 ; MIPS32-NEXT: sw $1, 0($sp) # 4-byte Folded Spill ; MIPS32-NEXT: sw $4, 8($sp) # 4-byte Folded Spill -; MIPS32-NEXT: bnez $3, $BB2_15 +; MIPS32-NEXT: bnez $3, $BB2_19 +; MIPS32-NEXT: nop +; MIPS32-NEXT: # %bb.17: # %b.PHI.2 +; MIPS32-NEXT: j $BB2_18 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.14: # %b.PHI.2.end +; MIPS32-NEXT: $BB2_18: # %b.PHI.2.end ; MIPS32-NEXT: lw $1, 0($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 20($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sw $1, 0($2) ; MIPS32-NEXT: addiu $sp, $sp, 48 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB2_15: # %b.PHI.3 +; MIPS32-NEXT: $BB2_19: # %b.PHI.3 ; MIPS32-NEXT: lw $1, 8($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 8($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $3, 32($sp) # 4-byte Folded Reload @@ -553,35 +589,44 @@ ; MIPS32-NEXT: sw $2, 32($sp) # 4-byte Folded Spill ; MIPS32-NEXT: sw $3, 28($sp) # 4-byte Folded Spill ; MIPS32-NEXT: swc1 $f0, 24($sp) # 4-byte Folded Spill -; MIPS32-NEXT: bnez $8, $BB3_9 +; MIPS32-NEXT: bnez $8, $BB3_12 +; MIPS32-NEXT: nop +; MIPS32-NEXT: # %bb.1: # %entry +; MIPS32-NEXT: j $BB3_2 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.1: # %pre.PHI.1 +; MIPS32-NEXT: $BB3_2: # %pre.PHI.1 ; MIPS32-NEXT: lw $1, 44($sp) # 4-byte Folded Reload ; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB3_4 +; MIPS32-NEXT: bnez $2, $BB3_7 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.2: # %pre.PHI.1.0 +; MIPS32-NEXT: # %bb.3: # %pre.PHI.1 +; MIPS32-NEXT: j $BB3_4 +; MIPS32-NEXT: nop +; MIPS32-NEXT: $BB3_4: # %pre.PHI.1.0 ; MIPS32-NEXT: lw $1, 40($sp) # 4-byte Folded Reload ; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB3_5 +; MIPS32-NEXT: bnez $2, $BB3_8 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.3: # %b.PHI.1.0 +; MIPS32-NEXT: # %bb.5: # %pre.PHI.1.0 +; MIPS32-NEXT: j $BB3_6 +; MIPS32-NEXT: nop +; MIPS32-NEXT: $BB3_6: # %b.PHI.1.0 ; MIPS32-NEXT: lw $1, 36($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lwc1 $f0, 0($1) ; MIPS32-NEXT: swc1 $f0, 20($sp) # 4-byte Folded Spill -; MIPS32-NEXT: j $BB3_6 +; MIPS32-NEXT: j $BB3_9 ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB3_4: # %b.PHI.1.1 +; MIPS32-NEXT: $BB3_7: # %b.PHI.1.1 ; MIPS32-NEXT: lw $1, 52($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lwc1 $f0, 0($1) ; MIPS32-NEXT: swc1 $f0, 20($sp) # 4-byte Folded Spill -; MIPS32-NEXT: j $BB3_6 +; MIPS32-NEXT: j $BB3_9 ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB3_5: # %b.PHI.1.2 +; MIPS32-NEXT: $BB3_8: # %b.PHI.1.2 ; MIPS32-NEXT: lw $1, 32($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lwc1 $f0, 0($1) ; MIPS32-NEXT: swc1 $f0, 20($sp) # 4-byte Folded Spill -; MIPS32-NEXT: $BB3_6: # %b.PHI.1 +; MIPS32-NEXT: $BB3_9: # %b.PHI.1 ; MIPS32-NEXT: lwc1 $f0, 20($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $1, 40($sp) # 4-byte Folded Reload ; MIPS32-NEXT: andi $2, $1, 1 @@ -590,37 +635,37 @@ ; MIPS32-NEXT: swc1 $f0, 16($sp) # 4-byte Folded Spill ; MIPS32-NEXT: swc1 $f1, 12($sp) # 4-byte Folded Spill ; MIPS32-NEXT: swc1 $f2, 8($sp) # 4-byte Folded Spill -; MIPS32-NEXT: bnez $2, $BB3_8 +; MIPS32-NEXT: bnez $2, $BB3_11 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.7: # %b.PHI.1 -; MIPS32-NEXT: j $BB3_15 +; MIPS32-NEXT: # %bb.10: # %b.PHI.1 +; MIPS32-NEXT: j $BB3_19 ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB3_8: # %b.PHI.1.end +; MIPS32-NEXT: $BB3_11: # %b.PHI.1.end ; MIPS32-NEXT: lwc1 $f0, 16($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $1, 28($sp) # 4-byte Folded Reload ; MIPS32-NEXT: swc1 $f0, 0($1) ; MIPS32-NEXT: addiu $sp, $sp, 56 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB3_9: # %pre.PHI.2 +; MIPS32-NEXT: $BB3_12: # %pre.PHI.2 ; MIPS32-NEXT: lw $1, 48($sp) # 4-byte Folded Reload ; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB3_11 +; MIPS32-NEXT: bnez $2, $BB3_14 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.10: # %pre.PHI.2 -; MIPS32-NEXT: j $BB3_12 +; MIPS32-NEXT: # %bb.13: # %pre.PHI.2 +; MIPS32-NEXT: j $BB3_15 ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB3_11: # %b.PHI.2.0 +; MIPS32-NEXT: $BB3_14: # %b.PHI.2.0 ; MIPS32-NEXT: lw $1, 36($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lwc1 $f0, 0($1) ; MIPS32-NEXT: swc1 $f0, 4($sp) # 4-byte Folded Spill -; MIPS32-NEXT: j $BB3_13 +; MIPS32-NEXT: j $BB3_16 ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB3_12: # %b.PHI.2.1 +; MIPS32-NEXT: $BB3_15: # %b.PHI.2.1 ; MIPS32-NEXT: lw $1, 52($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lwc1 $f0, 0($1) ; MIPS32-NEXT: swc1 $f0, 4($sp) # 4-byte Folded Spill -; MIPS32-NEXT: $BB3_13: # %b.PHI.2 +; MIPS32-NEXT: $BB3_16: # %b.PHI.2 ; MIPS32-NEXT: lwc1 $f0, 4($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $1, 44($sp) # 4-byte Folded Reload ; MIPS32-NEXT: andi $2, $1, 1 @@ -629,16 +674,19 @@ ; MIPS32-NEXT: swc1 $f0, 0($sp) # 4-byte Folded Spill ; MIPS32-NEXT: swc1 $f1, 12($sp) # 4-byte Folded Spill ; MIPS32-NEXT: swc1 $f2, 8($sp) # 4-byte Folded Spill -; MIPS32-NEXT: bnez $2, $BB3_15 +; MIPS32-NEXT: bnez $2, $BB3_19 +; MIPS32-NEXT: nop +; MIPS32-NEXT: # %bb.17: # %b.PHI.2 +; MIPS32-NEXT: j $BB3_18 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.14: # %b.PHI.2.end +; MIPS32-NEXT: $BB3_18: # %b.PHI.2.end ; MIPS32-NEXT: lwc1 $f0, 0($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $1, 28($sp) # 4-byte Folded Reload ; MIPS32-NEXT: swc1 $f0, 0($1) ; MIPS32-NEXT: addiu $sp, $sp, 56 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB3_15: # %b.PHI.3 +; MIPS32-NEXT: $BB3_19: # %b.PHI.3 ; MIPS32-NEXT: lwc1 $f0, 8($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lwc1 $f1, 12($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $1, 40($sp) # 4-byte Folded Reload diff --git a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/long_ambiguous_chain_s64.ll b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/long_ambiguous_chain_s64.ll --- a/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/long_ambiguous_chain_s64.ll +++ b/llvm/test/CodeGen/Mips/GlobalISel/llvm-ir/long_ambiguous_chain_s64.ll @@ -20,88 +20,100 @@ ; MIPS32-NEXT: sw $7, 52($sp) # 4-byte Folded Spill ; MIPS32-NEXT: sw $2, 48($sp) # 4-byte Folded Spill ; MIPS32-NEXT: sw $3, 44($sp) # 4-byte Folded Spill -; MIPS32-NEXT: bnez $8, $BB0_9 +; MIPS32-NEXT: bnez $8, $BB0_12 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.1: # %pre.PHI.1 +; MIPS32-NEXT: # %bb.1: # %entry +; MIPS32-NEXT: j $BB0_2 +; MIPS32-NEXT: nop +; MIPS32-NEXT: $BB0_2: # %pre.PHI.1 ; MIPS32-NEXT: lw $1, 60($sp) # 4-byte Folded Reload ; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB0_4 +; MIPS32-NEXT: bnez $2, $BB0_7 +; MIPS32-NEXT: nop +; MIPS32-NEXT: # %bb.3: # %pre.PHI.1 +; MIPS32-NEXT: j $BB0_4 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.2: # %pre.PHI.1.0 +; MIPS32-NEXT: $BB0_4: # %pre.PHI.1.0 ; MIPS32-NEXT: lw $1, 56($sp) # 4-byte Folded Reload ; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB0_5 +; MIPS32-NEXT: bnez $2, $BB0_8 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.3: # %b.PHI.1.0 +; MIPS32-NEXT: # %bb.5: # %pre.PHI.1.0 +; MIPS32-NEXT: j $BB0_6 +; MIPS32-NEXT: nop +; MIPS32-NEXT: $BB0_6: # %b.PHI.1.0 ; MIPS32-NEXT: lw $1, 52($sp) # 4-byte Folded Reload ; MIPS32-NEXT: ldc1 $f0, 0($1) ; MIPS32-NEXT: sdc1 $f0, 32($sp) # 8-byte Folded Spill -; MIPS32-NEXT: j $BB0_6 +; MIPS32-NEXT: j $BB0_9 ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB0_4: # %b.PHI.1.1 +; MIPS32-NEXT: $BB0_7: # %b.PHI.1.1 ; MIPS32-NEXT: lw $1, 68($sp) # 4-byte Folded Reload ; MIPS32-NEXT: ldc1 $f0, 0($1) ; MIPS32-NEXT: sdc1 $f0, 32($sp) # 8-byte Folded Spill -; MIPS32-NEXT: j $BB0_6 +; MIPS32-NEXT: j $BB0_9 ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB0_5: # %b.PHI.1.2 +; MIPS32-NEXT: $BB0_8: # %b.PHI.1.2 ; MIPS32-NEXT: lw $1, 48($sp) # 4-byte Folded Reload ; MIPS32-NEXT: ldc1 $f0, 0($1) ; MIPS32-NEXT: sdc1 $f0, 32($sp) # 8-byte Folded Spill -; MIPS32-NEXT: $BB0_6: # %b.PHI.1 +; MIPS32-NEXT: $BB0_9: # %b.PHI.1 ; MIPS32-NEXT: ldc1 $f0, 32($sp) # 8-byte Folded Reload ; MIPS32-NEXT: lw $1, 56($sp) # 4-byte Folded Reload ; MIPS32-NEXT: andi $2, $1, 1 ; MIPS32-NEXT: mov.d $f2, $f0 ; MIPS32-NEXT: sdc1 $f0, 24($sp) # 8-byte Folded Spill ; MIPS32-NEXT: sdc1 $f2, 16($sp) # 8-byte Folded Spill -; MIPS32-NEXT: bnez $2, $BB0_8 +; MIPS32-NEXT: bnez $2, $BB0_11 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.7: # %b.PHI.1 -; MIPS32-NEXT: j $BB0_15 +; MIPS32-NEXT: # %bb.10: # %b.PHI.1 +; MIPS32-NEXT: j $BB0_19 ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB0_8: # %b.PHI.1.end +; MIPS32-NEXT: $BB0_11: # %b.PHI.1.end ; MIPS32-NEXT: ldc1 $f0, 24($sp) # 8-byte Folded Reload ; MIPS32-NEXT: lw $1, 44($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sdc1 $f0, 0($1) ; MIPS32-NEXT: addiu $sp, $sp, 72 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB0_9: # %pre.PHI.2 +; MIPS32-NEXT: $BB0_12: # %pre.PHI.2 ; MIPS32-NEXT: lw $1, 64($sp) # 4-byte Folded Reload ; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB0_11 +; MIPS32-NEXT: bnez $2, $BB0_14 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.10: # %pre.PHI.2 -; MIPS32-NEXT: j $BB0_12 +; MIPS32-NEXT: # %bb.13: # %pre.PHI.2 +; MIPS32-NEXT: j $BB0_15 ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB0_11: # %b.PHI.2.0 +; MIPS32-NEXT: $BB0_14: # %b.PHI.2.0 ; MIPS32-NEXT: lw $1, 52($sp) # 4-byte Folded Reload ; MIPS32-NEXT: ldc1 $f0, 0($1) ; MIPS32-NEXT: sdc1 $f0, 8($sp) # 8-byte Folded Spill -; MIPS32-NEXT: j $BB0_13 +; MIPS32-NEXT: j $BB0_16 ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB0_12: # %b.PHI.2.1 +; MIPS32-NEXT: $BB0_15: # %b.PHI.2.1 ; MIPS32-NEXT: lw $1, 68($sp) # 4-byte Folded Reload ; MIPS32-NEXT: ldc1 $f0, 0($1) ; MIPS32-NEXT: sdc1 $f0, 8($sp) # 8-byte Folded Spill -; MIPS32-NEXT: $BB0_13: # %b.PHI.2 +; MIPS32-NEXT: $BB0_16: # %b.PHI.2 ; MIPS32-NEXT: ldc1 $f0, 8($sp) # 8-byte Folded Reload ; MIPS32-NEXT: lw $1, 60($sp) # 4-byte Folded Reload ; MIPS32-NEXT: andi $2, $1, 1 ; MIPS32-NEXT: mov.d $f2, $f0 ; MIPS32-NEXT: sdc1 $f0, 0($sp) # 8-byte Folded Spill ; MIPS32-NEXT: sdc1 $f2, 16($sp) # 8-byte Folded Spill -; MIPS32-NEXT: bnez $2, $BB0_15 +; MIPS32-NEXT: bnez $2, $BB0_19 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.14: # %b.PHI.2.end +; MIPS32-NEXT: # %bb.17: # %b.PHI.2 +; MIPS32-NEXT: j $BB0_18 +; MIPS32-NEXT: nop +; MIPS32-NEXT: $BB0_18: # %b.PHI.2.end ; MIPS32-NEXT: ldc1 $f0, 0($sp) # 8-byte Folded Reload ; MIPS32-NEXT: lw $1, 44($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sdc1 $f0, 0($1) ; MIPS32-NEXT: addiu $sp, $sp, 72 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB0_15: # %b.PHI.3 +; MIPS32-NEXT: $BB0_19: # %b.PHI.3 ; MIPS32-NEXT: ldc1 $f0, 16($sp) # 8-byte Folded Reload ; MIPS32-NEXT: ldc1 $f2, 16($sp) # 8-byte Folded Reload ; MIPS32-NEXT: lw $1, 56($sp) # 4-byte Folded Reload @@ -197,41 +209,50 @@ ; MIPS32-NEXT: sw $2, 56($sp) # 4-byte Folded Spill ; MIPS32-NEXT: sw $3, 52($sp) # 4-byte Folded Spill ; MIPS32-NEXT: sw $8, 48($sp) # 4-byte Folded Spill -; MIPS32-NEXT: bnez $9, $BB1_9 +; MIPS32-NEXT: bnez $9, $BB1_12 +; MIPS32-NEXT: nop +; MIPS32-NEXT: # %bb.1: # %entry +; MIPS32-NEXT: j $BB1_2 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.1: # %pre.PHI.1 +; MIPS32-NEXT: $BB1_2: # %pre.PHI.1 ; MIPS32-NEXT: lw $1, 68($sp) # 4-byte Folded Reload ; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB1_4 +; MIPS32-NEXT: bnez $2, $BB1_7 +; MIPS32-NEXT: nop +; MIPS32-NEXT: # %bb.3: # %pre.PHI.1 +; MIPS32-NEXT: j $BB1_4 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.2: # %pre.PHI.1.0 +; MIPS32-NEXT: $BB1_4: # %pre.PHI.1.0 ; MIPS32-NEXT: lw $1, 64($sp) # 4-byte Folded Reload ; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB1_5 +; MIPS32-NEXT: bnez $2, $BB1_8 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.3: # %b.PHI.1.0 +; MIPS32-NEXT: # %bb.5: # %pre.PHI.1.0 +; MIPS32-NEXT: j $BB1_6 +; MIPS32-NEXT: nop +; MIPS32-NEXT: $BB1_6: # %b.PHI.1.0 ; MIPS32-NEXT: lw $1, 60($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 0($1) ; MIPS32-NEXT: lw $3, 4($1) ; MIPS32-NEXT: sw $2, 44($sp) # 4-byte Folded Spill ; MIPS32-NEXT: sw $3, 40($sp) # 4-byte Folded Spill -; MIPS32-NEXT: j $BB1_6 +; MIPS32-NEXT: j $BB1_9 ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB1_4: # %b.PHI.1.1 +; MIPS32-NEXT: $BB1_7: # %b.PHI.1.1 ; MIPS32-NEXT: lw $1, 76($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 0($1) ; MIPS32-NEXT: lw $3, 4($1) ; MIPS32-NEXT: sw $2, 44($sp) # 4-byte Folded Spill ; MIPS32-NEXT: sw $3, 40($sp) # 4-byte Folded Spill -; MIPS32-NEXT: j $BB1_6 +; MIPS32-NEXT: j $BB1_9 ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB1_5: # %b.PHI.1.2 +; MIPS32-NEXT: $BB1_8: # %b.PHI.1.2 ; MIPS32-NEXT: lw $1, 56($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 0($1) ; MIPS32-NEXT: lw $3, 4($1) ; MIPS32-NEXT: sw $2, 44($sp) # 4-byte Folded Spill ; MIPS32-NEXT: sw $3, 40($sp) # 4-byte Folded Spill -; MIPS32-NEXT: $BB1_6: # %b.PHI.1 +; MIPS32-NEXT: $BB1_9: # %b.PHI.1 ; MIPS32-NEXT: lw $1, 40($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 44($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $3, 64($sp) # 4-byte Folded Reload @@ -246,12 +267,12 @@ ; MIPS32-NEXT: sw $6, 24($sp) # 4-byte Folded Spill ; MIPS32-NEXT: sw $7, 20($sp) # 4-byte Folded Spill ; MIPS32-NEXT: sw $8, 16($sp) # 4-byte Folded Spill -; MIPS32-NEXT: bnez $4, $BB1_8 +; MIPS32-NEXT: bnez $4, $BB1_11 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.7: # %b.PHI.1 -; MIPS32-NEXT: j $BB1_15 +; MIPS32-NEXT: # %bb.10: # %b.PHI.1 +; MIPS32-NEXT: j $BB1_19 ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB1_8: # %b.PHI.1.end +; MIPS32-NEXT: $BB1_11: # %b.PHI.1.end ; MIPS32-NEXT: lw $1, 32($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 52($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sw $1, 0($2) @@ -260,29 +281,29 @@ ; MIPS32-NEXT: addiu $sp, $sp, 80 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB1_9: # %pre.PHI.2 +; MIPS32-NEXT: $BB1_12: # %pre.PHI.2 ; MIPS32-NEXT: lw $1, 72($sp) # 4-byte Folded Reload ; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB1_11 +; MIPS32-NEXT: bnez $2, $BB1_14 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.10: # %pre.PHI.2 -; MIPS32-NEXT: j $BB1_12 +; MIPS32-NEXT: # %bb.13: # %pre.PHI.2 +; MIPS32-NEXT: j $BB1_15 ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB1_11: # %b.PHI.2.0 +; MIPS32-NEXT: $BB1_14: # %b.PHI.2.0 ; MIPS32-NEXT: lw $1, 60($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 0($1) ; MIPS32-NEXT: lw $3, 4($1) ; MIPS32-NEXT: sw $2, 12($sp) # 4-byte Folded Spill ; MIPS32-NEXT: sw $3, 8($sp) # 4-byte Folded Spill -; MIPS32-NEXT: j $BB1_13 +; MIPS32-NEXT: j $BB1_16 ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB1_12: # %b.PHI.2.1 +; MIPS32-NEXT: $BB1_15: # %b.PHI.2.1 ; MIPS32-NEXT: lw $1, 76($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 0($1) ; MIPS32-NEXT: lw $3, 4($1) ; MIPS32-NEXT: sw $2, 12($sp) # 4-byte Folded Spill ; MIPS32-NEXT: sw $3, 8($sp) # 4-byte Folded Spill -; MIPS32-NEXT: $BB1_13: # %b.PHI.2 +; MIPS32-NEXT: $BB1_16: # %b.PHI.2 ; MIPS32-NEXT: lw $1, 8($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 12($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $3, 68($sp) # 4-byte Folded Reload @@ -297,9 +318,12 @@ ; MIPS32-NEXT: sw $6, 24($sp) # 4-byte Folded Spill ; MIPS32-NEXT: sw $7, 20($sp) # 4-byte Folded Spill ; MIPS32-NEXT: sw $8, 16($sp) # 4-byte Folded Spill -; MIPS32-NEXT: bnez $4, $BB1_15 +; MIPS32-NEXT: bnez $4, $BB1_19 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.14: # %b.PHI.2.end +; MIPS32-NEXT: # %bb.17: # %b.PHI.2 +; MIPS32-NEXT: j $BB1_18 +; MIPS32-NEXT: nop +; MIPS32-NEXT: $BB1_18: # %b.PHI.2.end ; MIPS32-NEXT: lw $1, 0($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 52($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sw $1, 0($2) @@ -308,7 +332,7 @@ ; MIPS32-NEXT: addiu $sp, $sp, 80 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB1_15: # %b.PHI.3 +; MIPS32-NEXT: $BB1_19: # %b.PHI.3 ; MIPS32-NEXT: lw $1, 16($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $2, 20($sp) # 4-byte Folded Reload ; MIPS32-NEXT: lw $3, 24($sp) # 4-byte Folded Reload @@ -408,88 +432,100 @@ ; MIPS32-NEXT: sw $7, 52($sp) # 4-byte Folded Spill ; MIPS32-NEXT: sw $2, 48($sp) # 4-byte Folded Spill ; MIPS32-NEXT: sw $3, 44($sp) # 4-byte Folded Spill -; MIPS32-NEXT: bnez $8, $BB2_9 +; MIPS32-NEXT: bnez $8, $BB2_12 +; MIPS32-NEXT: nop +; MIPS32-NEXT: # %bb.1: # %entry +; MIPS32-NEXT: j $BB2_2 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.1: # %pre.PHI.1 +; MIPS32-NEXT: $BB2_2: # %pre.PHI.1 ; MIPS32-NEXT: lw $1, 60($sp) # 4-byte Folded Reload ; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB2_4 +; MIPS32-NEXT: bnez $2, $BB2_7 +; MIPS32-NEXT: nop +; MIPS32-NEXT: # %bb.3: # %pre.PHI.1 +; MIPS32-NEXT: j $BB2_4 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.2: # %pre.PHI.1.0 +; MIPS32-NEXT: $BB2_4: # %pre.PHI.1.0 ; MIPS32-NEXT: lw $1, 56($sp) # 4-byte Folded Reload ; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB2_5 +; MIPS32-NEXT: bnez $2, $BB2_8 +; MIPS32-NEXT: nop +; MIPS32-NEXT: # %bb.5: # %pre.PHI.1.0 +; MIPS32-NEXT: j $BB2_6 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.3: # %b.PHI.1.0 +; MIPS32-NEXT: $BB2_6: # %b.PHI.1.0 ; MIPS32-NEXT: lw $1, 52($sp) # 4-byte Folded Reload ; MIPS32-NEXT: ldc1 $f0, 0($1) ; MIPS32-NEXT: sdc1 $f0, 32($sp) # 8-byte Folded Spill -; MIPS32-NEXT: j $BB2_6 +; MIPS32-NEXT: j $BB2_9 ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB2_4: # %b.PHI.1.1 +; MIPS32-NEXT: $BB2_7: # %b.PHI.1.1 ; MIPS32-NEXT: lw $1, 68($sp) # 4-byte Folded Reload ; MIPS32-NEXT: ldc1 $f0, 0($1) ; MIPS32-NEXT: sdc1 $f0, 32($sp) # 8-byte Folded Spill -; MIPS32-NEXT: j $BB2_6 +; MIPS32-NEXT: j $BB2_9 ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB2_5: # %b.PHI.1.2 +; MIPS32-NEXT: $BB2_8: # %b.PHI.1.2 ; MIPS32-NEXT: lw $1, 48($sp) # 4-byte Folded Reload ; MIPS32-NEXT: ldc1 $f0, 0($1) ; MIPS32-NEXT: sdc1 $f0, 32($sp) # 8-byte Folded Spill -; MIPS32-NEXT: $BB2_6: # %b.PHI.1 +; MIPS32-NEXT: $BB2_9: # %b.PHI.1 ; MIPS32-NEXT: ldc1 $f0, 32($sp) # 8-byte Folded Reload ; MIPS32-NEXT: lw $1, 56($sp) # 4-byte Folded Reload ; MIPS32-NEXT: andi $2, $1, 1 ; MIPS32-NEXT: mov.d $f2, $f0 ; MIPS32-NEXT: sdc1 $f0, 24($sp) # 8-byte Folded Spill ; MIPS32-NEXT: sdc1 $f2, 16($sp) # 8-byte Folded Spill -; MIPS32-NEXT: bnez $2, $BB2_8 +; MIPS32-NEXT: bnez $2, $BB2_11 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.7: # %b.PHI.1 -; MIPS32-NEXT: j $BB2_15 +; MIPS32-NEXT: # %bb.10: # %b.PHI.1 +; MIPS32-NEXT: j $BB2_19 ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB2_8: # %b.PHI.1.end +; MIPS32-NEXT: $BB2_11: # %b.PHI.1.end ; MIPS32-NEXT: ldc1 $f0, 24($sp) # 8-byte Folded Reload ; MIPS32-NEXT: lw $1, 44($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sdc1 $f0, 0($1) ; MIPS32-NEXT: addiu $sp, $sp, 72 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB2_9: # %pre.PHI.2 +; MIPS32-NEXT: $BB2_12: # %pre.PHI.2 ; MIPS32-NEXT: lw $1, 64($sp) # 4-byte Folded Reload ; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB2_11 +; MIPS32-NEXT: bnez $2, $BB2_14 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.10: # %pre.PHI.2 -; MIPS32-NEXT: j $BB2_12 +; MIPS32-NEXT: # %bb.13: # %pre.PHI.2 +; MIPS32-NEXT: j $BB2_15 ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB2_11: # %b.PHI.2.0 +; MIPS32-NEXT: $BB2_14: # %b.PHI.2.0 ; MIPS32-NEXT: lw $1, 52($sp) # 4-byte Folded Reload ; MIPS32-NEXT: ldc1 $f0, 0($1) ; MIPS32-NEXT: sdc1 $f0, 8($sp) # 8-byte Folded Spill -; MIPS32-NEXT: j $BB2_13 +; MIPS32-NEXT: j $BB2_16 ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB2_12: # %b.PHI.2.1 +; MIPS32-NEXT: $BB2_15: # %b.PHI.2.1 ; MIPS32-NEXT: lw $1, 68($sp) # 4-byte Folded Reload ; MIPS32-NEXT: ldc1 $f0, 0($1) ; MIPS32-NEXT: sdc1 $f0, 8($sp) # 8-byte Folded Spill -; MIPS32-NEXT: $BB2_13: # %b.PHI.2 +; MIPS32-NEXT: $BB2_16: # %b.PHI.2 ; MIPS32-NEXT: ldc1 $f0, 8($sp) # 8-byte Folded Reload ; MIPS32-NEXT: lw $1, 60($sp) # 4-byte Folded Reload ; MIPS32-NEXT: andi $2, $1, 1 ; MIPS32-NEXT: mov.d $f2, $f0 ; MIPS32-NEXT: sdc1 $f0, 0($sp) # 8-byte Folded Spill ; MIPS32-NEXT: sdc1 $f2, 16($sp) # 8-byte Folded Spill -; MIPS32-NEXT: bnez $2, $BB2_15 +; MIPS32-NEXT: bnez $2, $BB2_19 +; MIPS32-NEXT: nop +; MIPS32-NEXT: # %bb.17: # %b.PHI.2 +; MIPS32-NEXT: j $BB2_18 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.14: # %b.PHI.2.end +; MIPS32-NEXT: $BB2_18: # %b.PHI.2.end ; MIPS32-NEXT: ldc1 $f0, 0($sp) # 8-byte Folded Reload ; MIPS32-NEXT: lw $1, 44($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sdc1 $f0, 0($1) ; MIPS32-NEXT: addiu $sp, $sp, 72 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB2_15: # %b.PHI.3 +; MIPS32-NEXT: $BB2_19: # %b.PHI.3 ; MIPS32-NEXT: ldc1 $f0, 16($sp) # 8-byte Folded Reload ; MIPS32-NEXT: ldc1 $f2, 16($sp) # 8-byte Folded Reload ; MIPS32-NEXT: lw $1, 56($sp) # 4-byte Folded Reload @@ -588,35 +624,44 @@ ; MIPS32-NEXT: sw $2, 64($sp) # 4-byte Folded Spill ; MIPS32-NEXT: sw $3, 60($sp) # 4-byte Folded Spill ; MIPS32-NEXT: sdc1 $f0, 48($sp) # 8-byte Folded Spill -; MIPS32-NEXT: bnez $8, $BB3_9 +; MIPS32-NEXT: bnez $8, $BB3_12 +; MIPS32-NEXT: nop +; MIPS32-NEXT: # %bb.1: # %entry +; MIPS32-NEXT: j $BB3_2 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.1: # %pre.PHI.1 +; MIPS32-NEXT: $BB3_2: # %pre.PHI.1 ; MIPS32-NEXT: lw $1, 76($sp) # 4-byte Folded Reload ; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB3_4 +; MIPS32-NEXT: bnez $2, $BB3_7 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.2: # %pre.PHI.1.0 +; MIPS32-NEXT: # %bb.3: # %pre.PHI.1 +; MIPS32-NEXT: j $BB3_4 +; MIPS32-NEXT: nop +; MIPS32-NEXT: $BB3_4: # %pre.PHI.1.0 ; MIPS32-NEXT: lw $1, 72($sp) # 4-byte Folded Reload ; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB3_5 +; MIPS32-NEXT: bnez $2, $BB3_8 +; MIPS32-NEXT: nop +; MIPS32-NEXT: # %bb.5: # %pre.PHI.1.0 +; MIPS32-NEXT: j $BB3_6 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.3: # %b.PHI.1.0 +; MIPS32-NEXT: $BB3_6: # %b.PHI.1.0 ; MIPS32-NEXT: lw $1, 68($sp) # 4-byte Folded Reload ; MIPS32-NEXT: ldc1 $f0, 0($1) ; MIPS32-NEXT: sdc1 $f0, 40($sp) # 8-byte Folded Spill -; MIPS32-NEXT: j $BB3_6 +; MIPS32-NEXT: j $BB3_9 ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB3_4: # %b.PHI.1.1 +; MIPS32-NEXT: $BB3_7: # %b.PHI.1.1 ; MIPS32-NEXT: lw $1, 84($sp) # 4-byte Folded Reload ; MIPS32-NEXT: ldc1 $f0, 0($1) ; MIPS32-NEXT: sdc1 $f0, 40($sp) # 8-byte Folded Spill -; MIPS32-NEXT: j $BB3_6 +; MIPS32-NEXT: j $BB3_9 ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB3_5: # %b.PHI.1.2 +; MIPS32-NEXT: $BB3_8: # %b.PHI.1.2 ; MIPS32-NEXT: lw $1, 64($sp) # 4-byte Folded Reload ; MIPS32-NEXT: ldc1 $f0, 0($1) ; MIPS32-NEXT: sdc1 $f0, 40($sp) # 8-byte Folded Spill -; MIPS32-NEXT: $BB3_6: # %b.PHI.1 +; MIPS32-NEXT: $BB3_9: # %b.PHI.1 ; MIPS32-NEXT: ldc1 $f0, 40($sp) # 8-byte Folded Reload ; MIPS32-NEXT: lw $1, 72($sp) # 4-byte Folded Reload ; MIPS32-NEXT: andi $2, $1, 1 @@ -625,37 +670,37 @@ ; MIPS32-NEXT: sdc1 $f0, 32($sp) # 8-byte Folded Spill ; MIPS32-NEXT: sdc1 $f2, 24($sp) # 8-byte Folded Spill ; MIPS32-NEXT: sdc1 $f4, 16($sp) # 8-byte Folded Spill -; MIPS32-NEXT: bnez $2, $BB3_8 +; MIPS32-NEXT: bnez $2, $BB3_11 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.7: # %b.PHI.1 -; MIPS32-NEXT: j $BB3_15 +; MIPS32-NEXT: # %bb.10: # %b.PHI.1 +; MIPS32-NEXT: j $BB3_19 ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB3_8: # %b.PHI.1.end +; MIPS32-NEXT: $BB3_11: # %b.PHI.1.end ; MIPS32-NEXT: ldc1 $f0, 32($sp) # 8-byte Folded Reload ; MIPS32-NEXT: lw $1, 60($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sdc1 $f0, 0($1) ; MIPS32-NEXT: addiu $sp, $sp, 88 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB3_9: # %pre.PHI.2 +; MIPS32-NEXT: $BB3_12: # %pre.PHI.2 ; MIPS32-NEXT: lw $1, 80($sp) # 4-byte Folded Reload ; MIPS32-NEXT: andi $2, $1, 1 -; MIPS32-NEXT: bnez $2, $BB3_11 +; MIPS32-NEXT: bnez $2, $BB3_14 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.10: # %pre.PHI.2 -; MIPS32-NEXT: j $BB3_12 +; MIPS32-NEXT: # %bb.13: # %pre.PHI.2 +; MIPS32-NEXT: j $BB3_15 ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB3_11: # %b.PHI.2.0 +; MIPS32-NEXT: $BB3_14: # %b.PHI.2.0 ; MIPS32-NEXT: lw $1, 68($sp) # 4-byte Folded Reload ; MIPS32-NEXT: ldc1 $f0, 0($1) ; MIPS32-NEXT: sdc1 $f0, 8($sp) # 8-byte Folded Spill -; MIPS32-NEXT: j $BB3_13 +; MIPS32-NEXT: j $BB3_16 ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB3_12: # %b.PHI.2.1 +; MIPS32-NEXT: $BB3_15: # %b.PHI.2.1 ; MIPS32-NEXT: lw $1, 84($sp) # 4-byte Folded Reload ; MIPS32-NEXT: ldc1 $f0, 0($1) ; MIPS32-NEXT: sdc1 $f0, 8($sp) # 8-byte Folded Spill -; MIPS32-NEXT: $BB3_13: # %b.PHI.2 +; MIPS32-NEXT: $BB3_16: # %b.PHI.2 ; MIPS32-NEXT: ldc1 $f0, 8($sp) # 8-byte Folded Reload ; MIPS32-NEXT: lw $1, 76($sp) # 4-byte Folded Reload ; MIPS32-NEXT: andi $2, $1, 1 @@ -664,16 +709,19 @@ ; MIPS32-NEXT: sdc1 $f0, 0($sp) # 8-byte Folded Spill ; MIPS32-NEXT: sdc1 $f2, 24($sp) # 8-byte Folded Spill ; MIPS32-NEXT: sdc1 $f4, 16($sp) # 8-byte Folded Spill -; MIPS32-NEXT: bnez $2, $BB3_15 +; MIPS32-NEXT: bnez $2, $BB3_19 +; MIPS32-NEXT: nop +; MIPS32-NEXT: # %bb.17: # %b.PHI.2 +; MIPS32-NEXT: j $BB3_18 ; MIPS32-NEXT: nop -; MIPS32-NEXT: # %bb.14: # %b.PHI.2.end +; MIPS32-NEXT: $BB3_18: # %b.PHI.2.end ; MIPS32-NEXT: ldc1 $f0, 0($sp) # 8-byte Folded Reload ; MIPS32-NEXT: lw $1, 60($sp) # 4-byte Folded Reload ; MIPS32-NEXT: sdc1 $f0, 0($1) ; MIPS32-NEXT: addiu $sp, $sp, 88 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: nop -; MIPS32-NEXT: $BB3_15: # %b.PHI.3 +; MIPS32-NEXT: $BB3_19: # %b.PHI.3 ; MIPS32-NEXT: ldc1 $f0, 16($sp) # 8-byte Folded Reload ; MIPS32-NEXT: ldc1 $f2, 24($sp) # 8-byte Folded Reload ; MIPS32-NEXT: lw $1, 72($sp) # 4-byte Folded Reload