Index: lib/Target/RISCV/RISCVISelLowering.cpp =================================================================== --- lib/Target/RISCV/RISCVISelLowering.cpp +++ lib/Target/RISCV/RISCVISelLowering.cpp @@ -17,6 +17,7 @@ #include "RISCVRegisterInfo.h" #include "RISCVSubtarget.h" #include "RISCVTargetMachine.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -787,10 +788,21 @@ return BB; } +static bool isSelectPseudo(MachineInstr &MI) { + switch (MI.getOpcode()) { + default: + return false; + case RISCV::Select_GPR_Using_CC_GPR: + case RISCV::Select_FPR32_Using_CC_GPR: + case RISCV::Select_FPR64_Using_CC_GPR: + return true; + } +} + static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI, MachineBasicBlock *BB) { - // To "insert" a SELECT instruction, we actually have to insert the triangle - // control-flow pattern. The incoming instruction knows the destination vreg + // To "insert" Select_* instructions, we actually have to insert the triangle + // control-flow pattern. The incoming instructions know the destination vreg // to set, the condition code register to branch on, the true/false values to // select between, and the condcode to use to select the appropriate branch. // @@ -800,6 +812,54 @@ // | IfFalseMBB // | / // TailMBB + // + // When we find a sequence of selects we attempt to optimize their emission + // by sharing the control flow. Currently we only handle cases where we have + // multiple selects with the exact same condition (same LHS, RHS and CC). + // The selects may be interleaved with other instructions if the other + // instructions meet some requirements we deem safe: + // - They are debug instructions. Otherwise, + // - They do not have side-effects, do not access memory and their inputs do + // not depend on the results of the select pseudo-instructions. + // The TrueV/FalseV operands of the selects cannot depend on the result of + // previous selects in the sequence. + // These conditions could be further relaxed. See the X86 target for a + // related approach and more information. + unsigned LHS = MI.getOperand(1).getReg(); + unsigned RHS = MI.getOperand(2).getReg(); + auto CC = static_cast(MI.getOperand(3).getImm()); + + SmallVector SelectDebugValues; + SmallSet SelectDests; + SelectDests.insert(MI.getOperand(0).getReg()); + + MachineInstr *LastSelectPseudo = &MI; + + for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI); + SequenceMBBI != E; SequenceMBBI = std::next(SequenceMBBI)) { + if (SequenceMBBI->isDebugInstr()) + continue; + else if (isSelectPseudo(*SequenceMBBI)) { + if (SequenceMBBI->getOperand(1).getReg() != LHS || + SequenceMBBI->getOperand(2).getReg() != RHS || + SequenceMBBI->getOperand(3).getImm() != CC || + SelectDests.count(SequenceMBBI->getOperand(4).getReg()) || + SelectDests.count(SequenceMBBI->getOperand(5).getReg())) + break; + LastSelectPseudo = &*SequenceMBBI; + SequenceMBBI->collectDebugValues(SelectDebugValues); + SelectDests.insert(SequenceMBBI->getOperand(0).getReg()); + } else { + if (SequenceMBBI->hasUnmodeledSideEffects() || + SequenceMBBI->mayLoadOrStore()) + break; + if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) { + return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg()); + })) + break; + } + } + const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo(); const BasicBlock *LLVM_BB = BB->getBasicBlock(); DebugLoc DL = MI.getDebugLoc(); @@ -812,20 +872,23 @@ F->insert(I, IfFalseMBB); F->insert(I, TailMBB); - // Move all remaining instructions to TailMBB. - TailMBB->splice(TailMBB->begin(), HeadMBB, std::next(MI.getIterator()), - HeadMBB->end()); + + // Transfer debug instructions regarding the selects to TailMBB. + for (MachineInstr *DebugInstr : SelectDebugValues) { + TailMBB->push_back(DebugInstr->removeFromParent()); + } + + // Move all instructions after the sequence to TailMBB. + TailMBB->splice(TailMBB->end(), HeadMBB, + std::next(LastSelectPseudo->getIterator()), HeadMBB->end()); // Update machine-CFG edges by transferring all successors of the current - // block to the new block which will contain the Phi node for the select. + // block to the new block which will contain the Phi nodes for the selects. TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB); // Set the successors for HeadMBB. HeadMBB->addSuccessor(IfFalseMBB); HeadMBB->addSuccessor(TailMBB); // Insert appropriate branch. - unsigned LHS = MI.getOperand(1).getReg(); - unsigned RHS = MI.getOperand(2).getReg(); - auto CC = static_cast(MI.getOperand(3).getImm()); unsigned Opcode = getBranchOpcodeForIntCondCode(CC); BuildMI(HeadMBB, DL, TII.get(Opcode)) @@ -836,15 +899,24 @@ // IfFalseMBB just falls through to TailMBB. IfFalseMBB->addSuccessor(TailMBB); - // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ] - BuildMI(*TailMBB, TailMBB->begin(), DL, TII.get(RISCV::PHI), - MI.getOperand(0).getReg()) - .addReg(MI.getOperand(4).getReg()) - .addMBB(HeadMBB) - .addReg(MI.getOperand(5).getReg()) - .addMBB(IfFalseMBB); + // Create PHIs for all of the select pseudo-instructions that were inserted. + auto SelectMBBI = MI.getIterator(); + auto SelectEnd = std::next(LastSelectPseudo->getIterator()); + while (SelectMBBI != SelectEnd) { + auto Next = std::next(SelectMBBI); + if (isSelectPseudo(*SelectMBBI)) { + // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ] + BuildMI(*TailMBB, TailMBB->begin(), SelectMBBI->getDebugLoc(), + TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg()) + .addReg(SelectMBBI->getOperand(4).getReg()) + .addMBB(HeadMBB) + .addReg(SelectMBBI->getOperand(5).getReg()) + .addMBB(IfFalseMBB); + SelectMBBI->eraseFromParent(); + } + SelectMBBI = Next; + } - MI.eraseFromParent(); // The pseudo instruction is gone now. return TailMBB; } Index: test/CodeGen/RISCV/atomic-rmw.ll =================================================================== --- test/CodeGen/RISCV/atomic-rmw.ll +++ test/CodeGen/RISCV/atomic-rmw.ll @@ -14574,24 +14574,19 @@ ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB200_1 Depth=1 ; RV32I-NEXT: slt a0, s0, a1 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: beqz a0, .LBB200_4 -; RV32I-NEXT: j .LBB200_5 +; RV32I-NEXT: j .LBB200_4 ; RV32I-NEXT: .LBB200_3: # in Loop: Header=BB200_1 Depth=1 ; RV32I-NEXT: sltu a0, s2, a2 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: bnez a0, .LBB200_5 ; RV32I-NEXT: .LBB200_4: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB200_1 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: .LBB200_5: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB200_1 Depth=1 +; RV32I-NEXT: sw a2, 0(sp) ; RV32I-NEXT: mv a3, a1 -; RV32I-NEXT: bnez a0, .LBB200_7 -; RV32I-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-NEXT: bnez a0, .LBB200_6 +; RV32I-NEXT: # %bb.5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB200_1 Depth=1 ; RV32I-NEXT: mv a3, s0 -; RV32I-NEXT: .LBB200_7: # %atomicrmw.start +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: .LBB200_6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB200_1 Depth=1 ; RV32I-NEXT: sw a1, 4(sp) ; RV32I-NEXT: mv a0, s1 @@ -14602,7 +14597,7 @@ ; RV32I-NEXT: lw a1, 4(sp) ; RV32I-NEXT: lw a2, 0(sp) ; RV32I-NEXT: beqz a0, .LBB200_1 -; RV32I-NEXT: # %bb.8: # %atomicrmw.end +; RV32I-NEXT: # %bb.7: # %atomicrmw.end ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) @@ -14632,24 +14627,19 @@ ; RV32IA-NEXT: # %bb.2: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB200_1 Depth=1 ; RV32IA-NEXT: slt a0, s0, a1 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: beqz a0, .LBB200_4 -; RV32IA-NEXT: j .LBB200_5 +; RV32IA-NEXT: j .LBB200_4 ; RV32IA-NEXT: .LBB200_3: # in Loop: Header=BB200_1 Depth=1 ; RV32IA-NEXT: sltu a0, s2, a2 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: bnez a0, .LBB200_5 ; RV32IA-NEXT: .LBB200_4: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB200_1 Depth=1 -; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: .LBB200_5: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB200_1 Depth=1 +; RV32IA-NEXT: sw a2, 0(sp) ; RV32IA-NEXT: mv a3, a1 -; RV32IA-NEXT: bnez a0, .LBB200_7 -; RV32IA-NEXT: # %bb.6: # %atomicrmw.start +; RV32IA-NEXT: bnez a0, .LBB200_6 +; RV32IA-NEXT: # %bb.5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB200_1 Depth=1 ; RV32IA-NEXT: mv a3, s0 -; RV32IA-NEXT: .LBB200_7: # %atomicrmw.start +; RV32IA-NEXT: mv a2, s2 +; RV32IA-NEXT: .LBB200_6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB200_1 Depth=1 ; RV32IA-NEXT: sw a1, 4(sp) ; RV32IA-NEXT: mv a0, s1 @@ -14660,7 +14650,7 @@ ; RV32IA-NEXT: lw a1, 4(sp) ; RV32IA-NEXT: lw a2, 0(sp) ; RV32IA-NEXT: beqz a0, .LBB200_1 -; RV32IA-NEXT: # %bb.8: # %atomicrmw.end +; RV32IA-NEXT: # %bb.7: # %atomicrmw.end ; RV32IA-NEXT: mv a0, a2 ; RV32IA-NEXT: lw s3, 12(sp) ; RV32IA-NEXT: lw s2, 16(sp) @@ -14735,24 +14725,19 @@ ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB201_1 Depth=1 ; RV32I-NEXT: slt a0, s0, a1 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: beqz a0, .LBB201_4 -; RV32I-NEXT: j .LBB201_5 +; RV32I-NEXT: j .LBB201_4 ; RV32I-NEXT: .LBB201_3: # in Loop: Header=BB201_1 Depth=1 ; RV32I-NEXT: sltu a0, s2, a2 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: bnez a0, .LBB201_5 ; RV32I-NEXT: .LBB201_4: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB201_1 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: .LBB201_5: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB201_1 Depth=1 +; RV32I-NEXT: sw a2, 0(sp) ; RV32I-NEXT: mv a3, a1 -; RV32I-NEXT: bnez a0, .LBB201_7 -; RV32I-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-NEXT: bnez a0, .LBB201_6 +; RV32I-NEXT: # %bb.5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB201_1 Depth=1 ; RV32I-NEXT: mv a3, s0 -; RV32I-NEXT: .LBB201_7: # %atomicrmw.start +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: .LBB201_6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB201_1 Depth=1 ; RV32I-NEXT: sw a1, 4(sp) ; RV32I-NEXT: mv a0, s1 @@ -14763,7 +14748,7 @@ ; RV32I-NEXT: lw a1, 4(sp) ; RV32I-NEXT: lw a2, 0(sp) ; RV32I-NEXT: beqz a0, .LBB201_1 -; RV32I-NEXT: # %bb.8: # %atomicrmw.end +; RV32I-NEXT: # %bb.7: # %atomicrmw.end ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) @@ -14793,24 +14778,19 @@ ; RV32IA-NEXT: # %bb.2: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB201_1 Depth=1 ; RV32IA-NEXT: slt a0, s0, a1 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: beqz a0, .LBB201_4 -; RV32IA-NEXT: j .LBB201_5 +; RV32IA-NEXT: j .LBB201_4 ; RV32IA-NEXT: .LBB201_3: # in Loop: Header=BB201_1 Depth=1 ; RV32IA-NEXT: sltu a0, s2, a2 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: bnez a0, .LBB201_5 ; RV32IA-NEXT: .LBB201_4: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB201_1 Depth=1 -; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: .LBB201_5: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB201_1 Depth=1 +; RV32IA-NEXT: sw a2, 0(sp) ; RV32IA-NEXT: mv a3, a1 -; RV32IA-NEXT: bnez a0, .LBB201_7 -; RV32IA-NEXT: # %bb.6: # %atomicrmw.start +; RV32IA-NEXT: bnez a0, .LBB201_6 +; RV32IA-NEXT: # %bb.5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB201_1 Depth=1 ; RV32IA-NEXT: mv a3, s0 -; RV32IA-NEXT: .LBB201_7: # %atomicrmw.start +; RV32IA-NEXT: mv a2, s2 +; RV32IA-NEXT: .LBB201_6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB201_1 Depth=1 ; RV32IA-NEXT: sw a1, 4(sp) ; RV32IA-NEXT: mv a0, s1 @@ -14821,7 +14801,7 @@ ; RV32IA-NEXT: lw a1, 4(sp) ; RV32IA-NEXT: lw a2, 0(sp) ; RV32IA-NEXT: beqz a0, .LBB201_1 -; RV32IA-NEXT: # %bb.8: # %atomicrmw.end +; RV32IA-NEXT: # %bb.7: # %atomicrmw.end ; RV32IA-NEXT: mv a0, a2 ; RV32IA-NEXT: lw s3, 12(sp) ; RV32IA-NEXT: lw s2, 16(sp) @@ -14896,24 +14876,19 @@ ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB202_1 Depth=1 ; RV32I-NEXT: slt a0, s0, a1 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: beqz a0, .LBB202_4 -; RV32I-NEXT: j .LBB202_5 +; RV32I-NEXT: j .LBB202_4 ; RV32I-NEXT: .LBB202_3: # in Loop: Header=BB202_1 Depth=1 ; RV32I-NEXT: sltu a0, s2, a2 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: bnez a0, .LBB202_5 ; RV32I-NEXT: .LBB202_4: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB202_1 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: .LBB202_5: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB202_1 Depth=1 +; RV32I-NEXT: sw a2, 0(sp) ; RV32I-NEXT: mv a3, a1 -; RV32I-NEXT: bnez a0, .LBB202_7 -; RV32I-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-NEXT: bnez a0, .LBB202_6 +; RV32I-NEXT: # %bb.5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB202_1 Depth=1 ; RV32I-NEXT: mv a3, s0 -; RV32I-NEXT: .LBB202_7: # %atomicrmw.start +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: .LBB202_6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB202_1 Depth=1 ; RV32I-NEXT: sw a1, 4(sp) ; RV32I-NEXT: mv a0, s1 @@ -14924,7 +14899,7 @@ ; RV32I-NEXT: lw a1, 4(sp) ; RV32I-NEXT: lw a2, 0(sp) ; RV32I-NEXT: beqz a0, .LBB202_1 -; RV32I-NEXT: # %bb.8: # %atomicrmw.end +; RV32I-NEXT: # %bb.7: # %atomicrmw.end ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) @@ -14954,24 +14929,19 @@ ; RV32IA-NEXT: # %bb.2: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB202_1 Depth=1 ; RV32IA-NEXT: slt a0, s0, a1 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: beqz a0, .LBB202_4 -; RV32IA-NEXT: j .LBB202_5 +; RV32IA-NEXT: j .LBB202_4 ; RV32IA-NEXT: .LBB202_3: # in Loop: Header=BB202_1 Depth=1 ; RV32IA-NEXT: sltu a0, s2, a2 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: bnez a0, .LBB202_5 ; RV32IA-NEXT: .LBB202_4: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB202_1 Depth=1 -; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: .LBB202_5: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB202_1 Depth=1 +; RV32IA-NEXT: sw a2, 0(sp) ; RV32IA-NEXT: mv a3, a1 -; RV32IA-NEXT: bnez a0, .LBB202_7 -; RV32IA-NEXT: # %bb.6: # %atomicrmw.start +; RV32IA-NEXT: bnez a0, .LBB202_6 +; RV32IA-NEXT: # %bb.5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB202_1 Depth=1 ; RV32IA-NEXT: mv a3, s0 -; RV32IA-NEXT: .LBB202_7: # %atomicrmw.start +; RV32IA-NEXT: mv a2, s2 +; RV32IA-NEXT: .LBB202_6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB202_1 Depth=1 ; RV32IA-NEXT: sw a1, 4(sp) ; RV32IA-NEXT: mv a0, s1 @@ -14982,7 +14952,7 @@ ; RV32IA-NEXT: lw a1, 4(sp) ; RV32IA-NEXT: lw a2, 0(sp) ; RV32IA-NEXT: beqz a0, .LBB202_1 -; RV32IA-NEXT: # %bb.8: # %atomicrmw.end +; RV32IA-NEXT: # %bb.7: # %atomicrmw.end ; RV32IA-NEXT: mv a0, a2 ; RV32IA-NEXT: lw s3, 12(sp) ; RV32IA-NEXT: lw s2, 16(sp) @@ -15057,24 +15027,19 @@ ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB203_1 Depth=1 ; RV32I-NEXT: slt a0, s0, a1 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: beqz a0, .LBB203_4 -; RV32I-NEXT: j .LBB203_5 +; RV32I-NEXT: j .LBB203_4 ; RV32I-NEXT: .LBB203_3: # in Loop: Header=BB203_1 Depth=1 ; RV32I-NEXT: sltu a0, s2, a2 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: bnez a0, .LBB203_5 ; RV32I-NEXT: .LBB203_4: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB203_1 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: .LBB203_5: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB203_1 Depth=1 +; RV32I-NEXT: sw a2, 0(sp) ; RV32I-NEXT: mv a3, a1 -; RV32I-NEXT: bnez a0, .LBB203_7 -; RV32I-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-NEXT: bnez a0, .LBB203_6 +; RV32I-NEXT: # %bb.5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB203_1 Depth=1 ; RV32I-NEXT: mv a3, s0 -; RV32I-NEXT: .LBB203_7: # %atomicrmw.start +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: .LBB203_6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB203_1 Depth=1 ; RV32I-NEXT: sw a1, 4(sp) ; RV32I-NEXT: mv a0, s1 @@ -15085,7 +15050,7 @@ ; RV32I-NEXT: lw a1, 4(sp) ; RV32I-NEXT: lw a2, 0(sp) ; RV32I-NEXT: beqz a0, .LBB203_1 -; RV32I-NEXT: # %bb.8: # %atomicrmw.end +; RV32I-NEXT: # %bb.7: # %atomicrmw.end ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) @@ -15115,24 +15080,19 @@ ; RV32IA-NEXT: # %bb.2: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB203_1 Depth=1 ; RV32IA-NEXT: slt a0, s0, a1 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: beqz a0, .LBB203_4 -; RV32IA-NEXT: j .LBB203_5 +; RV32IA-NEXT: j .LBB203_4 ; RV32IA-NEXT: .LBB203_3: # in Loop: Header=BB203_1 Depth=1 ; RV32IA-NEXT: sltu a0, s2, a2 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: bnez a0, .LBB203_5 ; RV32IA-NEXT: .LBB203_4: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB203_1 Depth=1 -; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: .LBB203_5: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB203_1 Depth=1 +; RV32IA-NEXT: sw a2, 0(sp) ; RV32IA-NEXT: mv a3, a1 -; RV32IA-NEXT: bnez a0, .LBB203_7 -; RV32IA-NEXT: # %bb.6: # %atomicrmw.start +; RV32IA-NEXT: bnez a0, .LBB203_6 +; RV32IA-NEXT: # %bb.5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB203_1 Depth=1 ; RV32IA-NEXT: mv a3, s0 -; RV32IA-NEXT: .LBB203_7: # %atomicrmw.start +; RV32IA-NEXT: mv a2, s2 +; RV32IA-NEXT: .LBB203_6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB203_1 Depth=1 ; RV32IA-NEXT: sw a1, 4(sp) ; RV32IA-NEXT: mv a0, s1 @@ -15143,7 +15103,7 @@ ; RV32IA-NEXT: lw a1, 4(sp) ; RV32IA-NEXT: lw a2, 0(sp) ; RV32IA-NEXT: beqz a0, .LBB203_1 -; RV32IA-NEXT: # %bb.8: # %atomicrmw.end +; RV32IA-NEXT: # %bb.7: # %atomicrmw.end ; RV32IA-NEXT: mv a0, a2 ; RV32IA-NEXT: lw s3, 12(sp) ; RV32IA-NEXT: lw s2, 16(sp) @@ -15218,24 +15178,19 @@ ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB204_1 Depth=1 ; RV32I-NEXT: slt a0, s0, a1 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: beqz a0, .LBB204_4 -; RV32I-NEXT: j .LBB204_5 +; RV32I-NEXT: j .LBB204_4 ; RV32I-NEXT: .LBB204_3: # in Loop: Header=BB204_1 Depth=1 ; RV32I-NEXT: sltu a0, s2, a2 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: bnez a0, .LBB204_5 ; RV32I-NEXT: .LBB204_4: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB204_1 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: .LBB204_5: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB204_1 Depth=1 +; RV32I-NEXT: sw a2, 0(sp) ; RV32I-NEXT: mv a3, a1 -; RV32I-NEXT: bnez a0, .LBB204_7 -; RV32I-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-NEXT: bnez a0, .LBB204_6 +; RV32I-NEXT: # %bb.5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB204_1 Depth=1 ; RV32I-NEXT: mv a3, s0 -; RV32I-NEXT: .LBB204_7: # %atomicrmw.start +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: .LBB204_6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB204_1 Depth=1 ; RV32I-NEXT: sw a1, 4(sp) ; RV32I-NEXT: mv a0, s1 @@ -15246,7 +15201,7 @@ ; RV32I-NEXT: lw a1, 4(sp) ; RV32I-NEXT: lw a2, 0(sp) ; RV32I-NEXT: beqz a0, .LBB204_1 -; RV32I-NEXT: # %bb.8: # %atomicrmw.end +; RV32I-NEXT: # %bb.7: # %atomicrmw.end ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) @@ -15276,24 +15231,19 @@ ; RV32IA-NEXT: # %bb.2: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB204_1 Depth=1 ; RV32IA-NEXT: slt a0, s0, a1 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: beqz a0, .LBB204_4 -; RV32IA-NEXT: j .LBB204_5 +; RV32IA-NEXT: j .LBB204_4 ; RV32IA-NEXT: .LBB204_3: # in Loop: Header=BB204_1 Depth=1 ; RV32IA-NEXT: sltu a0, s2, a2 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: bnez a0, .LBB204_5 ; RV32IA-NEXT: .LBB204_4: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB204_1 Depth=1 -; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: .LBB204_5: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB204_1 Depth=1 +; RV32IA-NEXT: sw a2, 0(sp) ; RV32IA-NEXT: mv a3, a1 -; RV32IA-NEXT: bnez a0, .LBB204_7 -; RV32IA-NEXT: # %bb.6: # %atomicrmw.start +; RV32IA-NEXT: bnez a0, .LBB204_6 +; RV32IA-NEXT: # %bb.5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB204_1 Depth=1 ; RV32IA-NEXT: mv a3, s0 -; RV32IA-NEXT: .LBB204_7: # %atomicrmw.start +; RV32IA-NEXT: mv a2, s2 +; RV32IA-NEXT: .LBB204_6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB204_1 Depth=1 ; RV32IA-NEXT: sw a1, 4(sp) ; RV32IA-NEXT: mv a0, s1 @@ -15304,7 +15254,7 @@ ; RV32IA-NEXT: lw a1, 4(sp) ; RV32IA-NEXT: lw a2, 0(sp) ; RV32IA-NEXT: beqz a0, .LBB204_1 -; RV32IA-NEXT: # %bb.8: # %atomicrmw.end +; RV32IA-NEXT: # %bb.7: # %atomicrmw.end ; RV32IA-NEXT: mv a0, a2 ; RV32IA-NEXT: lw s3, 12(sp) ; RV32IA-NEXT: lw s2, 16(sp) @@ -15386,19 +15336,14 @@ ; RV32I-NEXT: # in Loop: Header=BB205_1 Depth=1 ; RV32I-NEXT: xori a0, a0, 1 ; RV32I-NEXT: sw a2, 0(sp) +; RV32I-NEXT: mv a3, a1 ; RV32I-NEXT: bnez a0, .LBB205_6 ; RV32I-NEXT: # %bb.5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB205_1 Depth=1 +; RV32I-NEXT: mv a3, s0 ; RV32I-NEXT: mv a2, s2 ; RV32I-NEXT: .LBB205_6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB205_1 Depth=1 -; RV32I-NEXT: mv a3, a1 -; RV32I-NEXT: bnez a0, .LBB205_8 -; RV32I-NEXT: # %bb.7: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB205_1 Depth=1 -; RV32I-NEXT: mv a3, s0 -; RV32I-NEXT: .LBB205_8: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB205_1 Depth=1 ; RV32I-NEXT: sw a1, 4(sp) ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s3 @@ -15408,7 +15353,7 @@ ; RV32I-NEXT: lw a1, 4(sp) ; RV32I-NEXT: lw a2, 0(sp) ; RV32I-NEXT: beqz a0, .LBB205_1 -; RV32I-NEXT: # %bb.9: # %atomicrmw.end +; RV32I-NEXT: # %bb.7: # %atomicrmw.end ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) @@ -15445,19 +15390,14 @@ ; RV32IA-NEXT: # in Loop: Header=BB205_1 Depth=1 ; RV32IA-NEXT: xori a0, a0, 1 ; RV32IA-NEXT: sw a2, 0(sp) +; RV32IA-NEXT: mv a3, a1 ; RV32IA-NEXT: bnez a0, .LBB205_6 ; RV32IA-NEXT: # %bb.5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB205_1 Depth=1 +; RV32IA-NEXT: mv a3, s0 ; RV32IA-NEXT: mv a2, s2 ; RV32IA-NEXT: .LBB205_6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB205_1 Depth=1 -; RV32IA-NEXT: mv a3, a1 -; RV32IA-NEXT: bnez a0, .LBB205_8 -; RV32IA-NEXT: # %bb.7: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB205_1 Depth=1 -; RV32IA-NEXT: mv a3, s0 -; RV32IA-NEXT: .LBB205_8: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB205_1 Depth=1 ; RV32IA-NEXT: sw a1, 4(sp) ; RV32IA-NEXT: mv a0, s1 ; RV32IA-NEXT: mv a1, s3 @@ -15467,7 +15407,7 @@ ; RV32IA-NEXT: lw a1, 4(sp) ; RV32IA-NEXT: lw a2, 0(sp) ; RV32IA-NEXT: beqz a0, .LBB205_1 -; RV32IA-NEXT: # %bb.9: # %atomicrmw.end +; RV32IA-NEXT: # %bb.7: # %atomicrmw.end ; RV32IA-NEXT: mv a0, a2 ; RV32IA-NEXT: lw s3, 12(sp) ; RV32IA-NEXT: lw s2, 16(sp) @@ -15549,19 +15489,14 @@ ; RV32I-NEXT: # in Loop: Header=BB206_1 Depth=1 ; RV32I-NEXT: xori a0, a0, 1 ; RV32I-NEXT: sw a2, 0(sp) +; RV32I-NEXT: mv a3, a1 ; RV32I-NEXT: bnez a0, .LBB206_6 ; RV32I-NEXT: # %bb.5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB206_1 Depth=1 +; RV32I-NEXT: mv a3, s0 ; RV32I-NEXT: mv a2, s2 ; RV32I-NEXT: .LBB206_6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB206_1 Depth=1 -; RV32I-NEXT: mv a3, a1 -; RV32I-NEXT: bnez a0, .LBB206_8 -; RV32I-NEXT: # %bb.7: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB206_1 Depth=1 -; RV32I-NEXT: mv a3, s0 -; RV32I-NEXT: .LBB206_8: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB206_1 Depth=1 ; RV32I-NEXT: sw a1, 4(sp) ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s3 @@ -15571,7 +15506,7 @@ ; RV32I-NEXT: lw a1, 4(sp) ; RV32I-NEXT: lw a2, 0(sp) ; RV32I-NEXT: beqz a0, .LBB206_1 -; RV32I-NEXT: # %bb.9: # %atomicrmw.end +; RV32I-NEXT: # %bb.7: # %atomicrmw.end ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) @@ -15608,19 +15543,14 @@ ; RV32IA-NEXT: # in Loop: Header=BB206_1 Depth=1 ; RV32IA-NEXT: xori a0, a0, 1 ; RV32IA-NEXT: sw a2, 0(sp) +; RV32IA-NEXT: mv a3, a1 ; RV32IA-NEXT: bnez a0, .LBB206_6 ; RV32IA-NEXT: # %bb.5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB206_1 Depth=1 +; RV32IA-NEXT: mv a3, s0 ; RV32IA-NEXT: mv a2, s2 ; RV32IA-NEXT: .LBB206_6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB206_1 Depth=1 -; RV32IA-NEXT: mv a3, a1 -; RV32IA-NEXT: bnez a0, .LBB206_8 -; RV32IA-NEXT: # %bb.7: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB206_1 Depth=1 -; RV32IA-NEXT: mv a3, s0 -; RV32IA-NEXT: .LBB206_8: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB206_1 Depth=1 ; RV32IA-NEXT: sw a1, 4(sp) ; RV32IA-NEXT: mv a0, s1 ; RV32IA-NEXT: mv a1, s3 @@ -15630,7 +15560,7 @@ ; RV32IA-NEXT: lw a1, 4(sp) ; RV32IA-NEXT: lw a2, 0(sp) ; RV32IA-NEXT: beqz a0, .LBB206_1 -; RV32IA-NEXT: # %bb.9: # %atomicrmw.end +; RV32IA-NEXT: # %bb.7: # %atomicrmw.end ; RV32IA-NEXT: mv a0, a2 ; RV32IA-NEXT: lw s3, 12(sp) ; RV32IA-NEXT: lw s2, 16(sp) @@ -15712,19 +15642,14 @@ ; RV32I-NEXT: # in Loop: Header=BB207_1 Depth=1 ; RV32I-NEXT: xori a0, a0, 1 ; RV32I-NEXT: sw a2, 0(sp) +; RV32I-NEXT: mv a3, a1 ; RV32I-NEXT: bnez a0, .LBB207_6 ; RV32I-NEXT: # %bb.5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB207_1 Depth=1 +; RV32I-NEXT: mv a3, s0 ; RV32I-NEXT: mv a2, s2 ; RV32I-NEXT: .LBB207_6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB207_1 Depth=1 -; RV32I-NEXT: mv a3, a1 -; RV32I-NEXT: bnez a0, .LBB207_8 -; RV32I-NEXT: # %bb.7: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB207_1 Depth=1 -; RV32I-NEXT: mv a3, s0 -; RV32I-NEXT: .LBB207_8: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB207_1 Depth=1 ; RV32I-NEXT: sw a1, 4(sp) ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s3 @@ -15734,7 +15659,7 @@ ; RV32I-NEXT: lw a1, 4(sp) ; RV32I-NEXT: lw a2, 0(sp) ; RV32I-NEXT: beqz a0, .LBB207_1 -; RV32I-NEXT: # %bb.9: # %atomicrmw.end +; RV32I-NEXT: # %bb.7: # %atomicrmw.end ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) @@ -15771,19 +15696,14 @@ ; RV32IA-NEXT: # in Loop: Header=BB207_1 Depth=1 ; RV32IA-NEXT: xori a0, a0, 1 ; RV32IA-NEXT: sw a2, 0(sp) +; RV32IA-NEXT: mv a3, a1 ; RV32IA-NEXT: bnez a0, .LBB207_6 ; RV32IA-NEXT: # %bb.5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB207_1 Depth=1 +; RV32IA-NEXT: mv a3, s0 ; RV32IA-NEXT: mv a2, s2 ; RV32IA-NEXT: .LBB207_6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB207_1 Depth=1 -; RV32IA-NEXT: mv a3, a1 -; RV32IA-NEXT: bnez a0, .LBB207_8 -; RV32IA-NEXT: # %bb.7: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB207_1 Depth=1 -; RV32IA-NEXT: mv a3, s0 -; RV32IA-NEXT: .LBB207_8: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB207_1 Depth=1 ; RV32IA-NEXT: sw a1, 4(sp) ; RV32IA-NEXT: mv a0, s1 ; RV32IA-NEXT: mv a1, s3 @@ -15793,7 +15713,7 @@ ; RV32IA-NEXT: lw a1, 4(sp) ; RV32IA-NEXT: lw a2, 0(sp) ; RV32IA-NEXT: beqz a0, .LBB207_1 -; RV32IA-NEXT: # %bb.9: # %atomicrmw.end +; RV32IA-NEXT: # %bb.7: # %atomicrmw.end ; RV32IA-NEXT: mv a0, a2 ; RV32IA-NEXT: lw s3, 12(sp) ; RV32IA-NEXT: lw s2, 16(sp) @@ -15875,19 +15795,14 @@ ; RV32I-NEXT: # in Loop: Header=BB208_1 Depth=1 ; RV32I-NEXT: xori a0, a0, 1 ; RV32I-NEXT: sw a2, 0(sp) +; RV32I-NEXT: mv a3, a1 ; RV32I-NEXT: bnez a0, .LBB208_6 ; RV32I-NEXT: # %bb.5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB208_1 Depth=1 +; RV32I-NEXT: mv a3, s0 ; RV32I-NEXT: mv a2, s2 ; RV32I-NEXT: .LBB208_6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB208_1 Depth=1 -; RV32I-NEXT: mv a3, a1 -; RV32I-NEXT: bnez a0, .LBB208_8 -; RV32I-NEXT: # %bb.7: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB208_1 Depth=1 -; RV32I-NEXT: mv a3, s0 -; RV32I-NEXT: .LBB208_8: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB208_1 Depth=1 ; RV32I-NEXT: sw a1, 4(sp) ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s3 @@ -15897,7 +15812,7 @@ ; RV32I-NEXT: lw a1, 4(sp) ; RV32I-NEXT: lw a2, 0(sp) ; RV32I-NEXT: beqz a0, .LBB208_1 -; RV32I-NEXT: # %bb.9: # %atomicrmw.end +; RV32I-NEXT: # %bb.7: # %atomicrmw.end ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) @@ -15934,19 +15849,14 @@ ; RV32IA-NEXT: # in Loop: Header=BB208_1 Depth=1 ; RV32IA-NEXT: xori a0, a0, 1 ; RV32IA-NEXT: sw a2, 0(sp) +; RV32IA-NEXT: mv a3, a1 ; RV32IA-NEXT: bnez a0, .LBB208_6 ; RV32IA-NEXT: # %bb.5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB208_1 Depth=1 +; RV32IA-NEXT: mv a3, s0 ; RV32IA-NEXT: mv a2, s2 ; RV32IA-NEXT: .LBB208_6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB208_1 Depth=1 -; RV32IA-NEXT: mv a3, a1 -; RV32IA-NEXT: bnez a0, .LBB208_8 -; RV32IA-NEXT: # %bb.7: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB208_1 Depth=1 -; RV32IA-NEXT: mv a3, s0 -; RV32IA-NEXT: .LBB208_8: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB208_1 Depth=1 ; RV32IA-NEXT: sw a1, 4(sp) ; RV32IA-NEXT: mv a0, s1 ; RV32IA-NEXT: mv a1, s3 @@ -15956,7 +15866,7 @@ ; RV32IA-NEXT: lw a1, 4(sp) ; RV32IA-NEXT: lw a2, 0(sp) ; RV32IA-NEXT: beqz a0, .LBB208_1 -; RV32IA-NEXT: # %bb.9: # %atomicrmw.end +; RV32IA-NEXT: # %bb.7: # %atomicrmw.end ; RV32IA-NEXT: mv a0, a2 ; RV32IA-NEXT: lw s3, 12(sp) ; RV32IA-NEXT: lw s2, 16(sp) @@ -16038,19 +15948,14 @@ ; RV32I-NEXT: # in Loop: Header=BB209_1 Depth=1 ; RV32I-NEXT: xori a0, a0, 1 ; RV32I-NEXT: sw a2, 0(sp) +; RV32I-NEXT: mv a3, a1 ; RV32I-NEXT: bnez a0, .LBB209_6 ; RV32I-NEXT: # %bb.5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB209_1 Depth=1 +; RV32I-NEXT: mv a3, s0 ; RV32I-NEXT: mv a2, s2 ; RV32I-NEXT: .LBB209_6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB209_1 Depth=1 -; RV32I-NEXT: mv a3, a1 -; RV32I-NEXT: bnez a0, .LBB209_8 -; RV32I-NEXT: # %bb.7: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB209_1 Depth=1 -; RV32I-NEXT: mv a3, s0 -; RV32I-NEXT: .LBB209_8: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB209_1 Depth=1 ; RV32I-NEXT: sw a1, 4(sp) ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s3 @@ -16060,7 +15965,7 @@ ; RV32I-NEXT: lw a1, 4(sp) ; RV32I-NEXT: lw a2, 0(sp) ; RV32I-NEXT: beqz a0, .LBB209_1 -; RV32I-NEXT: # %bb.9: # %atomicrmw.end +; RV32I-NEXT: # %bb.7: # %atomicrmw.end ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) @@ -16097,19 +16002,14 @@ ; RV32IA-NEXT: # in Loop: Header=BB209_1 Depth=1 ; RV32IA-NEXT: xori a0, a0, 1 ; RV32IA-NEXT: sw a2, 0(sp) +; RV32IA-NEXT: mv a3, a1 ; RV32IA-NEXT: bnez a0, .LBB209_6 ; RV32IA-NEXT: # %bb.5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB209_1 Depth=1 +; RV32IA-NEXT: mv a3, s0 ; RV32IA-NEXT: mv a2, s2 ; RV32IA-NEXT: .LBB209_6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB209_1 Depth=1 -; RV32IA-NEXT: mv a3, a1 -; RV32IA-NEXT: bnez a0, .LBB209_8 -; RV32IA-NEXT: # %bb.7: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB209_1 Depth=1 -; RV32IA-NEXT: mv a3, s0 -; RV32IA-NEXT: .LBB209_8: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB209_1 Depth=1 ; RV32IA-NEXT: sw a1, 4(sp) ; RV32IA-NEXT: mv a0, s1 ; RV32IA-NEXT: mv a1, s3 @@ -16119,7 +16019,7 @@ ; RV32IA-NEXT: lw a1, 4(sp) ; RV32IA-NEXT: lw a2, 0(sp) ; RV32IA-NEXT: beqz a0, .LBB209_1 -; RV32IA-NEXT: # %bb.9: # %atomicrmw.end +; RV32IA-NEXT: # %bb.7: # %atomicrmw.end ; RV32IA-NEXT: mv a0, a2 ; RV32IA-NEXT: lw s3, 12(sp) ; RV32IA-NEXT: lw s2, 16(sp) @@ -16194,24 +16094,19 @@ ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB210_1 Depth=1 ; RV32I-NEXT: sltu a0, s0, a1 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: beqz a0, .LBB210_4 -; RV32I-NEXT: j .LBB210_5 +; RV32I-NEXT: j .LBB210_4 ; RV32I-NEXT: .LBB210_3: # in Loop: Header=BB210_1 Depth=1 ; RV32I-NEXT: sltu a0, s2, a2 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: bnez a0, .LBB210_5 ; RV32I-NEXT: .LBB210_4: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB210_1 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: .LBB210_5: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB210_1 Depth=1 +; RV32I-NEXT: sw a2, 0(sp) ; RV32I-NEXT: mv a3, a1 -; RV32I-NEXT: bnez a0, .LBB210_7 -; RV32I-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-NEXT: bnez a0, .LBB210_6 +; RV32I-NEXT: # %bb.5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB210_1 Depth=1 ; RV32I-NEXT: mv a3, s0 -; RV32I-NEXT: .LBB210_7: # %atomicrmw.start +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: .LBB210_6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB210_1 Depth=1 ; RV32I-NEXT: sw a1, 4(sp) ; RV32I-NEXT: mv a0, s1 @@ -16222,7 +16117,7 @@ ; RV32I-NEXT: lw a1, 4(sp) ; RV32I-NEXT: lw a2, 0(sp) ; RV32I-NEXT: beqz a0, .LBB210_1 -; RV32I-NEXT: # %bb.8: # %atomicrmw.end +; RV32I-NEXT: # %bb.7: # %atomicrmw.end ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) @@ -16252,24 +16147,19 @@ ; RV32IA-NEXT: # %bb.2: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB210_1 Depth=1 ; RV32IA-NEXT: sltu a0, s0, a1 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: beqz a0, .LBB210_4 -; RV32IA-NEXT: j .LBB210_5 +; RV32IA-NEXT: j .LBB210_4 ; RV32IA-NEXT: .LBB210_3: # in Loop: Header=BB210_1 Depth=1 ; RV32IA-NEXT: sltu a0, s2, a2 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: bnez a0, .LBB210_5 ; RV32IA-NEXT: .LBB210_4: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB210_1 Depth=1 -; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: .LBB210_5: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB210_1 Depth=1 +; RV32IA-NEXT: sw a2, 0(sp) ; RV32IA-NEXT: mv a3, a1 -; RV32IA-NEXT: bnez a0, .LBB210_7 -; RV32IA-NEXT: # %bb.6: # %atomicrmw.start +; RV32IA-NEXT: bnez a0, .LBB210_6 +; RV32IA-NEXT: # %bb.5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB210_1 Depth=1 ; RV32IA-NEXT: mv a3, s0 -; RV32IA-NEXT: .LBB210_7: # %atomicrmw.start +; RV32IA-NEXT: mv a2, s2 +; RV32IA-NEXT: .LBB210_6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB210_1 Depth=1 ; RV32IA-NEXT: sw a1, 4(sp) ; RV32IA-NEXT: mv a0, s1 @@ -16280,7 +16170,7 @@ ; RV32IA-NEXT: lw a1, 4(sp) ; RV32IA-NEXT: lw a2, 0(sp) ; RV32IA-NEXT: beqz a0, .LBB210_1 -; RV32IA-NEXT: # %bb.8: # %atomicrmw.end +; RV32IA-NEXT: # %bb.7: # %atomicrmw.end ; RV32IA-NEXT: mv a0, a2 ; RV32IA-NEXT: lw s3, 12(sp) ; RV32IA-NEXT: lw s2, 16(sp) @@ -16355,24 +16245,19 @@ ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB211_1 Depth=1 ; RV32I-NEXT: sltu a0, s0, a1 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: beqz a0, .LBB211_4 -; RV32I-NEXT: j .LBB211_5 +; RV32I-NEXT: j .LBB211_4 ; RV32I-NEXT: .LBB211_3: # in Loop: Header=BB211_1 Depth=1 ; RV32I-NEXT: sltu a0, s2, a2 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: bnez a0, .LBB211_5 ; RV32I-NEXT: .LBB211_4: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB211_1 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: .LBB211_5: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB211_1 Depth=1 +; RV32I-NEXT: sw a2, 0(sp) ; RV32I-NEXT: mv a3, a1 -; RV32I-NEXT: bnez a0, .LBB211_7 -; RV32I-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-NEXT: bnez a0, .LBB211_6 +; RV32I-NEXT: # %bb.5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB211_1 Depth=1 ; RV32I-NEXT: mv a3, s0 -; RV32I-NEXT: .LBB211_7: # %atomicrmw.start +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: .LBB211_6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB211_1 Depth=1 ; RV32I-NEXT: sw a1, 4(sp) ; RV32I-NEXT: mv a0, s1 @@ -16383,7 +16268,7 @@ ; RV32I-NEXT: lw a1, 4(sp) ; RV32I-NEXT: lw a2, 0(sp) ; RV32I-NEXT: beqz a0, .LBB211_1 -; RV32I-NEXT: # %bb.8: # %atomicrmw.end +; RV32I-NEXT: # %bb.7: # %atomicrmw.end ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) @@ -16413,24 +16298,19 @@ ; RV32IA-NEXT: # %bb.2: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB211_1 Depth=1 ; RV32IA-NEXT: sltu a0, s0, a1 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: beqz a0, .LBB211_4 -; RV32IA-NEXT: j .LBB211_5 +; RV32IA-NEXT: j .LBB211_4 ; RV32IA-NEXT: .LBB211_3: # in Loop: Header=BB211_1 Depth=1 ; RV32IA-NEXT: sltu a0, s2, a2 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: bnez a0, .LBB211_5 ; RV32IA-NEXT: .LBB211_4: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB211_1 Depth=1 -; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: .LBB211_5: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB211_1 Depth=1 +; RV32IA-NEXT: sw a2, 0(sp) ; RV32IA-NEXT: mv a3, a1 -; RV32IA-NEXT: bnez a0, .LBB211_7 -; RV32IA-NEXT: # %bb.6: # %atomicrmw.start +; RV32IA-NEXT: bnez a0, .LBB211_6 +; RV32IA-NEXT: # %bb.5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB211_1 Depth=1 ; RV32IA-NEXT: mv a3, s0 -; RV32IA-NEXT: .LBB211_7: # %atomicrmw.start +; RV32IA-NEXT: mv a2, s2 +; RV32IA-NEXT: .LBB211_6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB211_1 Depth=1 ; RV32IA-NEXT: sw a1, 4(sp) ; RV32IA-NEXT: mv a0, s1 @@ -16441,7 +16321,7 @@ ; RV32IA-NEXT: lw a1, 4(sp) ; RV32IA-NEXT: lw a2, 0(sp) ; RV32IA-NEXT: beqz a0, .LBB211_1 -; RV32IA-NEXT: # %bb.8: # %atomicrmw.end +; RV32IA-NEXT: # %bb.7: # %atomicrmw.end ; RV32IA-NEXT: mv a0, a2 ; RV32IA-NEXT: lw s3, 12(sp) ; RV32IA-NEXT: lw s2, 16(sp) @@ -16516,24 +16396,19 @@ ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB212_1 Depth=1 ; RV32I-NEXT: sltu a0, s0, a1 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: beqz a0, .LBB212_4 -; RV32I-NEXT: j .LBB212_5 +; RV32I-NEXT: j .LBB212_4 ; RV32I-NEXT: .LBB212_3: # in Loop: Header=BB212_1 Depth=1 ; RV32I-NEXT: sltu a0, s2, a2 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: bnez a0, .LBB212_5 ; RV32I-NEXT: .LBB212_4: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB212_1 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: .LBB212_5: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB212_1 Depth=1 +; RV32I-NEXT: sw a2, 0(sp) ; RV32I-NEXT: mv a3, a1 -; RV32I-NEXT: bnez a0, .LBB212_7 -; RV32I-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-NEXT: bnez a0, .LBB212_6 +; RV32I-NEXT: # %bb.5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB212_1 Depth=1 ; RV32I-NEXT: mv a3, s0 -; RV32I-NEXT: .LBB212_7: # %atomicrmw.start +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: .LBB212_6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB212_1 Depth=1 ; RV32I-NEXT: sw a1, 4(sp) ; RV32I-NEXT: mv a0, s1 @@ -16544,7 +16419,7 @@ ; RV32I-NEXT: lw a1, 4(sp) ; RV32I-NEXT: lw a2, 0(sp) ; RV32I-NEXT: beqz a0, .LBB212_1 -; RV32I-NEXT: # %bb.8: # %atomicrmw.end +; RV32I-NEXT: # %bb.7: # %atomicrmw.end ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) @@ -16574,24 +16449,19 @@ ; RV32IA-NEXT: # %bb.2: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB212_1 Depth=1 ; RV32IA-NEXT: sltu a0, s0, a1 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: beqz a0, .LBB212_4 -; RV32IA-NEXT: j .LBB212_5 +; RV32IA-NEXT: j .LBB212_4 ; RV32IA-NEXT: .LBB212_3: # in Loop: Header=BB212_1 Depth=1 ; RV32IA-NEXT: sltu a0, s2, a2 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: bnez a0, .LBB212_5 ; RV32IA-NEXT: .LBB212_4: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB212_1 Depth=1 -; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: .LBB212_5: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB212_1 Depth=1 +; RV32IA-NEXT: sw a2, 0(sp) ; RV32IA-NEXT: mv a3, a1 -; RV32IA-NEXT: bnez a0, .LBB212_7 -; RV32IA-NEXT: # %bb.6: # %atomicrmw.start +; RV32IA-NEXT: bnez a0, .LBB212_6 +; RV32IA-NEXT: # %bb.5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB212_1 Depth=1 ; RV32IA-NEXT: mv a3, s0 -; RV32IA-NEXT: .LBB212_7: # %atomicrmw.start +; RV32IA-NEXT: mv a2, s2 +; RV32IA-NEXT: .LBB212_6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB212_1 Depth=1 ; RV32IA-NEXT: sw a1, 4(sp) ; RV32IA-NEXT: mv a0, s1 @@ -16602,7 +16472,7 @@ ; RV32IA-NEXT: lw a1, 4(sp) ; RV32IA-NEXT: lw a2, 0(sp) ; RV32IA-NEXT: beqz a0, .LBB212_1 -; RV32IA-NEXT: # %bb.8: # %atomicrmw.end +; RV32IA-NEXT: # %bb.7: # %atomicrmw.end ; RV32IA-NEXT: mv a0, a2 ; RV32IA-NEXT: lw s3, 12(sp) ; RV32IA-NEXT: lw s2, 16(sp) @@ -16677,24 +16547,19 @@ ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB213_1 Depth=1 ; RV32I-NEXT: sltu a0, s0, a1 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: beqz a0, .LBB213_4 -; RV32I-NEXT: j .LBB213_5 +; RV32I-NEXT: j .LBB213_4 ; RV32I-NEXT: .LBB213_3: # in Loop: Header=BB213_1 Depth=1 ; RV32I-NEXT: sltu a0, s2, a2 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: bnez a0, .LBB213_5 ; RV32I-NEXT: .LBB213_4: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB213_1 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: .LBB213_5: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB213_1 Depth=1 +; RV32I-NEXT: sw a2, 0(sp) ; RV32I-NEXT: mv a3, a1 -; RV32I-NEXT: bnez a0, .LBB213_7 -; RV32I-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-NEXT: bnez a0, .LBB213_6 +; RV32I-NEXT: # %bb.5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB213_1 Depth=1 ; RV32I-NEXT: mv a3, s0 -; RV32I-NEXT: .LBB213_7: # %atomicrmw.start +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: .LBB213_6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB213_1 Depth=1 ; RV32I-NEXT: sw a1, 4(sp) ; RV32I-NEXT: mv a0, s1 @@ -16705,7 +16570,7 @@ ; RV32I-NEXT: lw a1, 4(sp) ; RV32I-NEXT: lw a2, 0(sp) ; RV32I-NEXT: beqz a0, .LBB213_1 -; RV32I-NEXT: # %bb.8: # %atomicrmw.end +; RV32I-NEXT: # %bb.7: # %atomicrmw.end ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) @@ -16735,24 +16600,19 @@ ; RV32IA-NEXT: # %bb.2: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB213_1 Depth=1 ; RV32IA-NEXT: sltu a0, s0, a1 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: beqz a0, .LBB213_4 -; RV32IA-NEXT: j .LBB213_5 +; RV32IA-NEXT: j .LBB213_4 ; RV32IA-NEXT: .LBB213_3: # in Loop: Header=BB213_1 Depth=1 ; RV32IA-NEXT: sltu a0, s2, a2 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: bnez a0, .LBB213_5 ; RV32IA-NEXT: .LBB213_4: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB213_1 Depth=1 -; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: .LBB213_5: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB213_1 Depth=1 +; RV32IA-NEXT: sw a2, 0(sp) ; RV32IA-NEXT: mv a3, a1 -; RV32IA-NEXT: bnez a0, .LBB213_7 -; RV32IA-NEXT: # %bb.6: # %atomicrmw.start +; RV32IA-NEXT: bnez a0, .LBB213_6 +; RV32IA-NEXT: # %bb.5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB213_1 Depth=1 ; RV32IA-NEXT: mv a3, s0 -; RV32IA-NEXT: .LBB213_7: # %atomicrmw.start +; RV32IA-NEXT: mv a2, s2 +; RV32IA-NEXT: .LBB213_6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB213_1 Depth=1 ; RV32IA-NEXT: sw a1, 4(sp) ; RV32IA-NEXT: mv a0, s1 @@ -16763,7 +16623,7 @@ ; RV32IA-NEXT: lw a1, 4(sp) ; RV32IA-NEXT: lw a2, 0(sp) ; RV32IA-NEXT: beqz a0, .LBB213_1 -; RV32IA-NEXT: # %bb.8: # %atomicrmw.end +; RV32IA-NEXT: # %bb.7: # %atomicrmw.end ; RV32IA-NEXT: mv a0, a2 ; RV32IA-NEXT: lw s3, 12(sp) ; RV32IA-NEXT: lw s2, 16(sp) @@ -16838,24 +16698,19 @@ ; RV32I-NEXT: # %bb.2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB214_1 Depth=1 ; RV32I-NEXT: sltu a0, s0, a1 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: beqz a0, .LBB214_4 -; RV32I-NEXT: j .LBB214_5 +; RV32I-NEXT: j .LBB214_4 ; RV32I-NEXT: .LBB214_3: # in Loop: Header=BB214_1 Depth=1 ; RV32I-NEXT: sltu a0, s2, a2 -; RV32I-NEXT: sw a2, 0(sp) -; RV32I-NEXT: bnez a0, .LBB214_5 ; RV32I-NEXT: .LBB214_4: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB214_1 Depth=1 -; RV32I-NEXT: mv a2, s2 -; RV32I-NEXT: .LBB214_5: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB214_1 Depth=1 +; RV32I-NEXT: sw a2, 0(sp) ; RV32I-NEXT: mv a3, a1 -; RV32I-NEXT: bnez a0, .LBB214_7 -; RV32I-NEXT: # %bb.6: # %atomicrmw.start +; RV32I-NEXT: bnez a0, .LBB214_6 +; RV32I-NEXT: # %bb.5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB214_1 Depth=1 ; RV32I-NEXT: mv a3, s0 -; RV32I-NEXT: .LBB214_7: # %atomicrmw.start +; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: .LBB214_6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB214_1 Depth=1 ; RV32I-NEXT: sw a1, 4(sp) ; RV32I-NEXT: mv a0, s1 @@ -16866,7 +16721,7 @@ ; RV32I-NEXT: lw a1, 4(sp) ; RV32I-NEXT: lw a2, 0(sp) ; RV32I-NEXT: beqz a0, .LBB214_1 -; RV32I-NEXT: # %bb.8: # %atomicrmw.end +; RV32I-NEXT: # %bb.7: # %atomicrmw.end ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) @@ -16896,24 +16751,19 @@ ; RV32IA-NEXT: # %bb.2: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB214_1 Depth=1 ; RV32IA-NEXT: sltu a0, s0, a1 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: beqz a0, .LBB214_4 -; RV32IA-NEXT: j .LBB214_5 +; RV32IA-NEXT: j .LBB214_4 ; RV32IA-NEXT: .LBB214_3: # in Loop: Header=BB214_1 Depth=1 ; RV32IA-NEXT: sltu a0, s2, a2 -; RV32IA-NEXT: sw a2, 0(sp) -; RV32IA-NEXT: bnez a0, .LBB214_5 ; RV32IA-NEXT: .LBB214_4: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB214_1 Depth=1 -; RV32IA-NEXT: mv a2, s2 -; RV32IA-NEXT: .LBB214_5: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB214_1 Depth=1 +; RV32IA-NEXT: sw a2, 0(sp) ; RV32IA-NEXT: mv a3, a1 -; RV32IA-NEXT: bnez a0, .LBB214_7 -; RV32IA-NEXT: # %bb.6: # %atomicrmw.start +; RV32IA-NEXT: bnez a0, .LBB214_6 +; RV32IA-NEXT: # %bb.5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB214_1 Depth=1 ; RV32IA-NEXT: mv a3, s0 -; RV32IA-NEXT: .LBB214_7: # %atomicrmw.start +; RV32IA-NEXT: mv a2, s2 +; RV32IA-NEXT: .LBB214_6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB214_1 Depth=1 ; RV32IA-NEXT: sw a1, 4(sp) ; RV32IA-NEXT: mv a0, s1 @@ -16924,7 +16774,7 @@ ; RV32IA-NEXT: lw a1, 4(sp) ; RV32IA-NEXT: lw a2, 0(sp) ; RV32IA-NEXT: beqz a0, .LBB214_1 -; RV32IA-NEXT: # %bb.8: # %atomicrmw.end +; RV32IA-NEXT: # %bb.7: # %atomicrmw.end ; RV32IA-NEXT: mv a0, a2 ; RV32IA-NEXT: lw s3, 12(sp) ; RV32IA-NEXT: lw s2, 16(sp) @@ -17006,19 +16856,14 @@ ; RV32I-NEXT: # in Loop: Header=BB215_1 Depth=1 ; RV32I-NEXT: xori a0, a0, 1 ; RV32I-NEXT: sw a2, 0(sp) +; RV32I-NEXT: mv a3, a1 ; RV32I-NEXT: bnez a0, .LBB215_6 ; RV32I-NEXT: # %bb.5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB215_1 Depth=1 +; RV32I-NEXT: mv a3, s0 ; RV32I-NEXT: mv a2, s2 ; RV32I-NEXT: .LBB215_6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB215_1 Depth=1 -; RV32I-NEXT: mv a3, a1 -; RV32I-NEXT: bnez a0, .LBB215_8 -; RV32I-NEXT: # %bb.7: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB215_1 Depth=1 -; RV32I-NEXT: mv a3, s0 -; RV32I-NEXT: .LBB215_8: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB215_1 Depth=1 ; RV32I-NEXT: sw a1, 4(sp) ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s3 @@ -17028,7 +16873,7 @@ ; RV32I-NEXT: lw a1, 4(sp) ; RV32I-NEXT: lw a2, 0(sp) ; RV32I-NEXT: beqz a0, .LBB215_1 -; RV32I-NEXT: # %bb.9: # %atomicrmw.end +; RV32I-NEXT: # %bb.7: # %atomicrmw.end ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) @@ -17065,19 +16910,14 @@ ; RV32IA-NEXT: # in Loop: Header=BB215_1 Depth=1 ; RV32IA-NEXT: xori a0, a0, 1 ; RV32IA-NEXT: sw a2, 0(sp) +; RV32IA-NEXT: mv a3, a1 ; RV32IA-NEXT: bnez a0, .LBB215_6 ; RV32IA-NEXT: # %bb.5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB215_1 Depth=1 +; RV32IA-NEXT: mv a3, s0 ; RV32IA-NEXT: mv a2, s2 ; RV32IA-NEXT: .LBB215_6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB215_1 Depth=1 -; RV32IA-NEXT: mv a3, a1 -; RV32IA-NEXT: bnez a0, .LBB215_8 -; RV32IA-NEXT: # %bb.7: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB215_1 Depth=1 -; RV32IA-NEXT: mv a3, s0 -; RV32IA-NEXT: .LBB215_8: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB215_1 Depth=1 ; RV32IA-NEXT: sw a1, 4(sp) ; RV32IA-NEXT: mv a0, s1 ; RV32IA-NEXT: mv a1, s3 @@ -17087,7 +16927,7 @@ ; RV32IA-NEXT: lw a1, 4(sp) ; RV32IA-NEXT: lw a2, 0(sp) ; RV32IA-NEXT: beqz a0, .LBB215_1 -; RV32IA-NEXT: # %bb.9: # %atomicrmw.end +; RV32IA-NEXT: # %bb.7: # %atomicrmw.end ; RV32IA-NEXT: mv a0, a2 ; RV32IA-NEXT: lw s3, 12(sp) ; RV32IA-NEXT: lw s2, 16(sp) @@ -17169,19 +17009,14 @@ ; RV32I-NEXT: # in Loop: Header=BB216_1 Depth=1 ; RV32I-NEXT: xori a0, a0, 1 ; RV32I-NEXT: sw a2, 0(sp) +; RV32I-NEXT: mv a3, a1 ; RV32I-NEXT: bnez a0, .LBB216_6 ; RV32I-NEXT: # %bb.5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB216_1 Depth=1 +; RV32I-NEXT: mv a3, s0 ; RV32I-NEXT: mv a2, s2 ; RV32I-NEXT: .LBB216_6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB216_1 Depth=1 -; RV32I-NEXT: mv a3, a1 -; RV32I-NEXT: bnez a0, .LBB216_8 -; RV32I-NEXT: # %bb.7: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB216_1 Depth=1 -; RV32I-NEXT: mv a3, s0 -; RV32I-NEXT: .LBB216_8: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB216_1 Depth=1 ; RV32I-NEXT: sw a1, 4(sp) ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s3 @@ -17191,7 +17026,7 @@ ; RV32I-NEXT: lw a1, 4(sp) ; RV32I-NEXT: lw a2, 0(sp) ; RV32I-NEXT: beqz a0, .LBB216_1 -; RV32I-NEXT: # %bb.9: # %atomicrmw.end +; RV32I-NEXT: # %bb.7: # %atomicrmw.end ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) @@ -17228,19 +17063,14 @@ ; RV32IA-NEXT: # in Loop: Header=BB216_1 Depth=1 ; RV32IA-NEXT: xori a0, a0, 1 ; RV32IA-NEXT: sw a2, 0(sp) +; RV32IA-NEXT: mv a3, a1 ; RV32IA-NEXT: bnez a0, .LBB216_6 ; RV32IA-NEXT: # %bb.5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB216_1 Depth=1 +; RV32IA-NEXT: mv a3, s0 ; RV32IA-NEXT: mv a2, s2 ; RV32IA-NEXT: .LBB216_6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB216_1 Depth=1 -; RV32IA-NEXT: mv a3, a1 -; RV32IA-NEXT: bnez a0, .LBB216_8 -; RV32IA-NEXT: # %bb.7: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB216_1 Depth=1 -; RV32IA-NEXT: mv a3, s0 -; RV32IA-NEXT: .LBB216_8: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB216_1 Depth=1 ; RV32IA-NEXT: sw a1, 4(sp) ; RV32IA-NEXT: mv a0, s1 ; RV32IA-NEXT: mv a1, s3 @@ -17250,7 +17080,7 @@ ; RV32IA-NEXT: lw a1, 4(sp) ; RV32IA-NEXT: lw a2, 0(sp) ; RV32IA-NEXT: beqz a0, .LBB216_1 -; RV32IA-NEXT: # %bb.9: # %atomicrmw.end +; RV32IA-NEXT: # %bb.7: # %atomicrmw.end ; RV32IA-NEXT: mv a0, a2 ; RV32IA-NEXT: lw s3, 12(sp) ; RV32IA-NEXT: lw s2, 16(sp) @@ -17332,19 +17162,14 @@ ; RV32I-NEXT: # in Loop: Header=BB217_1 Depth=1 ; RV32I-NEXT: xori a0, a0, 1 ; RV32I-NEXT: sw a2, 0(sp) +; RV32I-NEXT: mv a3, a1 ; RV32I-NEXT: bnez a0, .LBB217_6 ; RV32I-NEXT: # %bb.5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB217_1 Depth=1 +; RV32I-NEXT: mv a3, s0 ; RV32I-NEXT: mv a2, s2 ; RV32I-NEXT: .LBB217_6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB217_1 Depth=1 -; RV32I-NEXT: mv a3, a1 -; RV32I-NEXT: bnez a0, .LBB217_8 -; RV32I-NEXT: # %bb.7: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB217_1 Depth=1 -; RV32I-NEXT: mv a3, s0 -; RV32I-NEXT: .LBB217_8: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB217_1 Depth=1 ; RV32I-NEXT: sw a1, 4(sp) ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s3 @@ -17354,7 +17179,7 @@ ; RV32I-NEXT: lw a1, 4(sp) ; RV32I-NEXT: lw a2, 0(sp) ; RV32I-NEXT: beqz a0, .LBB217_1 -; RV32I-NEXT: # %bb.9: # %atomicrmw.end +; RV32I-NEXT: # %bb.7: # %atomicrmw.end ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) @@ -17391,19 +17216,14 @@ ; RV32IA-NEXT: # in Loop: Header=BB217_1 Depth=1 ; RV32IA-NEXT: xori a0, a0, 1 ; RV32IA-NEXT: sw a2, 0(sp) +; RV32IA-NEXT: mv a3, a1 ; RV32IA-NEXT: bnez a0, .LBB217_6 ; RV32IA-NEXT: # %bb.5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB217_1 Depth=1 +; RV32IA-NEXT: mv a3, s0 ; RV32IA-NEXT: mv a2, s2 ; RV32IA-NEXT: .LBB217_6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB217_1 Depth=1 -; RV32IA-NEXT: mv a3, a1 -; RV32IA-NEXT: bnez a0, .LBB217_8 -; RV32IA-NEXT: # %bb.7: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB217_1 Depth=1 -; RV32IA-NEXT: mv a3, s0 -; RV32IA-NEXT: .LBB217_8: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB217_1 Depth=1 ; RV32IA-NEXT: sw a1, 4(sp) ; RV32IA-NEXT: mv a0, s1 ; RV32IA-NEXT: mv a1, s3 @@ -17413,7 +17233,7 @@ ; RV32IA-NEXT: lw a1, 4(sp) ; RV32IA-NEXT: lw a2, 0(sp) ; RV32IA-NEXT: beqz a0, .LBB217_1 -; RV32IA-NEXT: # %bb.9: # %atomicrmw.end +; RV32IA-NEXT: # %bb.7: # %atomicrmw.end ; RV32IA-NEXT: mv a0, a2 ; RV32IA-NEXT: lw s3, 12(sp) ; RV32IA-NEXT: lw s2, 16(sp) @@ -17495,19 +17315,14 @@ ; RV32I-NEXT: # in Loop: Header=BB218_1 Depth=1 ; RV32I-NEXT: xori a0, a0, 1 ; RV32I-NEXT: sw a2, 0(sp) +; RV32I-NEXT: mv a3, a1 ; RV32I-NEXT: bnez a0, .LBB218_6 ; RV32I-NEXT: # %bb.5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB218_1 Depth=1 +; RV32I-NEXT: mv a3, s0 ; RV32I-NEXT: mv a2, s2 ; RV32I-NEXT: .LBB218_6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB218_1 Depth=1 -; RV32I-NEXT: mv a3, a1 -; RV32I-NEXT: bnez a0, .LBB218_8 -; RV32I-NEXT: # %bb.7: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB218_1 Depth=1 -; RV32I-NEXT: mv a3, s0 -; RV32I-NEXT: .LBB218_8: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB218_1 Depth=1 ; RV32I-NEXT: sw a1, 4(sp) ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s3 @@ -17517,7 +17332,7 @@ ; RV32I-NEXT: lw a1, 4(sp) ; RV32I-NEXT: lw a2, 0(sp) ; RV32I-NEXT: beqz a0, .LBB218_1 -; RV32I-NEXT: # %bb.9: # %atomicrmw.end +; RV32I-NEXT: # %bb.7: # %atomicrmw.end ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) @@ -17554,19 +17369,14 @@ ; RV32IA-NEXT: # in Loop: Header=BB218_1 Depth=1 ; RV32IA-NEXT: xori a0, a0, 1 ; RV32IA-NEXT: sw a2, 0(sp) +; RV32IA-NEXT: mv a3, a1 ; RV32IA-NEXT: bnez a0, .LBB218_6 ; RV32IA-NEXT: # %bb.5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB218_1 Depth=1 +; RV32IA-NEXT: mv a3, s0 ; RV32IA-NEXT: mv a2, s2 ; RV32IA-NEXT: .LBB218_6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB218_1 Depth=1 -; RV32IA-NEXT: mv a3, a1 -; RV32IA-NEXT: bnez a0, .LBB218_8 -; RV32IA-NEXT: # %bb.7: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB218_1 Depth=1 -; RV32IA-NEXT: mv a3, s0 -; RV32IA-NEXT: .LBB218_8: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB218_1 Depth=1 ; RV32IA-NEXT: sw a1, 4(sp) ; RV32IA-NEXT: mv a0, s1 ; RV32IA-NEXT: mv a1, s3 @@ -17576,7 +17386,7 @@ ; RV32IA-NEXT: lw a1, 4(sp) ; RV32IA-NEXT: lw a2, 0(sp) ; RV32IA-NEXT: beqz a0, .LBB218_1 -; RV32IA-NEXT: # %bb.9: # %atomicrmw.end +; RV32IA-NEXT: # %bb.7: # %atomicrmw.end ; RV32IA-NEXT: mv a0, a2 ; RV32IA-NEXT: lw s3, 12(sp) ; RV32IA-NEXT: lw s2, 16(sp) @@ -17658,19 +17468,14 @@ ; RV32I-NEXT: # in Loop: Header=BB219_1 Depth=1 ; RV32I-NEXT: xori a0, a0, 1 ; RV32I-NEXT: sw a2, 0(sp) +; RV32I-NEXT: mv a3, a1 ; RV32I-NEXT: bnez a0, .LBB219_6 ; RV32I-NEXT: # %bb.5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB219_1 Depth=1 +; RV32I-NEXT: mv a3, s0 ; RV32I-NEXT: mv a2, s2 ; RV32I-NEXT: .LBB219_6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB219_1 Depth=1 -; RV32I-NEXT: mv a3, a1 -; RV32I-NEXT: bnez a0, .LBB219_8 -; RV32I-NEXT: # %bb.7: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB219_1 Depth=1 -; RV32I-NEXT: mv a3, s0 -; RV32I-NEXT: .LBB219_8: # %atomicrmw.start -; RV32I-NEXT: # in Loop: Header=BB219_1 Depth=1 ; RV32I-NEXT: sw a1, 4(sp) ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: mv a1, s3 @@ -17680,7 +17485,7 @@ ; RV32I-NEXT: lw a1, 4(sp) ; RV32I-NEXT: lw a2, 0(sp) ; RV32I-NEXT: beqz a0, .LBB219_1 -; RV32I-NEXT: # %bb.9: # %atomicrmw.end +; RV32I-NEXT: # %bb.7: # %atomicrmw.end ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: lw s3, 12(sp) ; RV32I-NEXT: lw s2, 16(sp) @@ -17717,19 +17522,14 @@ ; RV32IA-NEXT: # in Loop: Header=BB219_1 Depth=1 ; RV32IA-NEXT: xori a0, a0, 1 ; RV32IA-NEXT: sw a2, 0(sp) +; RV32IA-NEXT: mv a3, a1 ; RV32IA-NEXT: bnez a0, .LBB219_6 ; RV32IA-NEXT: # %bb.5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB219_1 Depth=1 +; RV32IA-NEXT: mv a3, s0 ; RV32IA-NEXT: mv a2, s2 ; RV32IA-NEXT: .LBB219_6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB219_1 Depth=1 -; RV32IA-NEXT: mv a3, a1 -; RV32IA-NEXT: bnez a0, .LBB219_8 -; RV32IA-NEXT: # %bb.7: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB219_1 Depth=1 -; RV32IA-NEXT: mv a3, s0 -; RV32IA-NEXT: .LBB219_8: # %atomicrmw.start -; RV32IA-NEXT: # in Loop: Header=BB219_1 Depth=1 ; RV32IA-NEXT: sw a1, 4(sp) ; RV32IA-NEXT: mv a0, s1 ; RV32IA-NEXT: mv a1, s3 @@ -17739,7 +17539,7 @@ ; RV32IA-NEXT: lw a1, 4(sp) ; RV32IA-NEXT: lw a2, 0(sp) ; RV32IA-NEXT: beqz a0, .LBB219_1 -; RV32IA-NEXT: # %bb.9: # %atomicrmw.end +; RV32IA-NEXT: # %bb.7: # %atomicrmw.end ; RV32IA-NEXT: mv a0, a2 ; RV32IA-NEXT: lw s3, 12(sp) ; RV32IA-NEXT: lw s2, 16(sp) Index: test/CodeGen/RISCV/select-optimize-multiple.ll =================================================================== --- /dev/null +++ test/CodeGen/RISCV/select-optimize-multiple.ll @@ -0,0 +1,323 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+d -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32I +; RUN: llc -mtriple=riscv64 -mattr=+d -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64I + +; Selects of wide values are split into two selects, which can easily cause +; unnecessary control flow. Here we check some cases where we can currently +; emit a sequence of selects with shared control flow. + +define i64 @cmovcc64(i32 signext %a, i64 %b, i64 %c) nounwind { +; RV32I-LABEL: cmovcc64: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi a5, zero, 123 +; RV32I-NEXT: beq a0, a5, .LBB0_2 +; RV32I-NEXT: # %bb.1: # %entry +; RV32I-NEXT: mv a2, a4 +; RV32I-NEXT: mv a1, a3 +; RV32I-NEXT: .LBB0_2: # %entry +; RV32I-NEXT: mv a0, a1 +; RV32I-NEXT: mv a1, a2 +; RV32I-NEXT: ret +; +; RV64I-LABEL: cmovcc64: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: addi a3, zero, 123 +; RV64I-NEXT: beq a0, a3, .LBB0_2 +; RV64I-NEXT: # %bb.1: # %entry +; RV64I-NEXT: mv a1, a2 +; RV64I-NEXT: .LBB0_2: # %entry +; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: ret +entry: + %cmp = icmp eq i32 %a, 123 + %cond = select i1 %cmp, i64 %b, i64 %c + ret i64 %cond +} + +define i128 @cmovcc128(i64 signext %a, i128 %b, i128 %c) nounwind { +; RV32I-LABEL: cmovcc128: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: xori a1, a1, 123 +; RV32I-NEXT: or a1, a1, a2 +; RV32I-NEXT: beqz a1, .LBB1_2 +; RV32I-NEXT: # %bb.1: # %entry +; RV32I-NEXT: addi a1, a4, 4 +; RV32I-NEXT: addi a2, a4, 8 +; RV32I-NEXT: addi a5, a4, 12 +; RV32I-NEXT: mv a3, a4 +; RV32I-NEXT: j .LBB1_3 +; RV32I-NEXT: .LBB1_2: +; RV32I-NEXT: addi a1, a3, 4 +; RV32I-NEXT: addi a2, a3, 8 +; RV32I-NEXT: addi a5, a3, 12 +; RV32I-NEXT: .LBB1_3: # %entry +; RV32I-NEXT: lw a4, 0(a5) +; RV32I-NEXT: sw a4, 12(a0) +; RV32I-NEXT: lw a2, 0(a2) +; RV32I-NEXT: sw a2, 8(a0) +; RV32I-NEXT: lw a1, 0(a1) +; RV32I-NEXT: sw a1, 4(a0) +; RV32I-NEXT: lw a1, 0(a3) +; RV32I-NEXT: sw a1, 0(a0) +; RV32I-NEXT: ret +; +; RV64I-LABEL: cmovcc128: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: addi a5, zero, 123 +; RV64I-NEXT: beq a0, a5, .LBB1_2 +; RV64I-NEXT: # %bb.1: # %entry +; RV64I-NEXT: mv a2, a4 +; RV64I-NEXT: mv a1, a3 +; RV64I-NEXT: .LBB1_2: # %entry +; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: mv a1, a2 +; RV64I-NEXT: ret +entry: + %cmp = icmp eq i64 %a, 123 + %cond = select i1 %cmp, i128 %b, i128 %c + ret i128 %cond +} + +define i64 @cmov64(i1 %a, i64 %b, i64 %c) nounwind { +; RV32I-LABEL: cmov64: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: andi a0, a0, 1 +; RV32I-NEXT: bnez a0, .LBB2_2 +; RV32I-NEXT: # %bb.1: # %entry +; RV32I-NEXT: mv a2, a4 +; RV32I-NEXT: mv a1, a3 +; RV32I-NEXT: .LBB2_2: # %entry +; RV32I-NEXT: mv a0, a1 +; RV32I-NEXT: mv a1, a2 +; RV32I-NEXT: ret +; +; RV64I-LABEL: cmov64: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: andi a0, a0, 1 +; RV64I-NEXT: bnez a0, .LBB2_2 +; RV64I-NEXT: # %bb.1: # %entry +; RV64I-NEXT: mv a1, a2 +; RV64I-NEXT: .LBB2_2: # %entry +; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: ret +entry: + %cond = select i1 %a, i64 %b, i64 %c + ret i64 %cond +} + +define i128 @cmov128(i1 %a, i128 %b, i128 %c) nounwind { +; RV32I-LABEL: cmov128: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: andi a1, a1, 1 +; RV32I-NEXT: bnez a1, .LBB3_2 +; RV32I-NEXT: # %bb.1: # %entry +; RV32I-NEXT: addi a1, a3, 4 +; RV32I-NEXT: addi a4, a3, 8 +; RV32I-NEXT: addi a5, a3, 12 +; RV32I-NEXT: mv a2, a3 +; RV32I-NEXT: j .LBB3_3 +; RV32I-NEXT: .LBB3_2: +; RV32I-NEXT: addi a1, a2, 4 +; RV32I-NEXT: addi a4, a2, 8 +; RV32I-NEXT: addi a5, a2, 12 +; RV32I-NEXT: .LBB3_3: # %entry +; RV32I-NEXT: lw a3, 0(a5) +; RV32I-NEXT: sw a3, 12(a0) +; RV32I-NEXT: lw a3, 0(a4) +; RV32I-NEXT: sw a3, 8(a0) +; RV32I-NEXT: lw a1, 0(a1) +; RV32I-NEXT: sw a1, 4(a0) +; RV32I-NEXT: lw a1, 0(a2) +; RV32I-NEXT: sw a1, 0(a0) +; RV32I-NEXT: ret +; +; RV64I-LABEL: cmov128: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: andi a0, a0, 1 +; RV64I-NEXT: bnez a0, .LBB3_2 +; RV64I-NEXT: # %bb.1: # %entry +; RV64I-NEXT: mv a2, a4 +; RV64I-NEXT: mv a1, a3 +; RV64I-NEXT: .LBB3_2: # %entry +; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: mv a1, a2 +; RV64I-NEXT: ret +entry: + %cond = select i1 %a, i128 %b, i128 %c + ret i128 %cond +} + +define float @cmovfloat(i1 %a, float %b, float %c, float %d, float %e) nounwind { +; RV32I-LABEL: cmovfloat: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: andi a0, a0, 1 +; RV32I-NEXT: bnez a0, .LBB4_2 +; RV32I-NEXT: # %bb.1: # %entry +; RV32I-NEXT: fmv.w.x ft0, a4 +; RV32I-NEXT: fmv.w.x ft1, a2 +; RV32I-NEXT: j .LBB4_3 +; RV32I-NEXT: .LBB4_2: +; RV32I-NEXT: fmv.w.x ft0, a3 +; RV32I-NEXT: fmv.w.x ft1, a1 +; RV32I-NEXT: .LBB4_3: # %entry +; RV32I-NEXT: fadd.s ft0, ft1, ft0 +; RV32I-NEXT: fmv.x.w a0, ft0 +; RV32I-NEXT: ret +; +; RV64I-LABEL: cmovfloat: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: andi a0, a0, 1 +; RV64I-NEXT: bnez a0, .LBB4_2 +; RV64I-NEXT: # %bb.1: # %entry +; RV64I-NEXT: fmv.w.x ft0, a4 +; RV64I-NEXT: fmv.w.x ft1, a2 +; RV64I-NEXT: j .LBB4_3 +; RV64I-NEXT: .LBB4_2: +; RV64I-NEXT: fmv.w.x ft0, a3 +; RV64I-NEXT: fmv.w.x ft1, a1 +; RV64I-NEXT: .LBB4_3: # %entry +; RV64I-NEXT: fadd.s ft0, ft1, ft0 +; RV64I-NEXT: fmv.x.w a0, ft0 +; RV64I-NEXT: ret +entry: + %cond1 = select i1 %a, float %b, float %c + %cond2 = select i1 %a, float %d, float %e + %ret = fadd float %cond1, %cond2 + ret float %ret +} + +define double @cmovdouble(i1 %a, double %b, double %c) nounwind { +; RV32I-LABEL: cmovdouble: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw a3, 8(sp) +; RV32I-NEXT: sw a4, 12(sp) +; RV32I-NEXT: fld ft0, 8(sp) +; RV32I-NEXT: sw a1, 8(sp) +; RV32I-NEXT: sw a2, 12(sp) +; RV32I-NEXT: fld ft1, 8(sp) +; RV32I-NEXT: andi a0, a0, 1 +; RV32I-NEXT: bnez a0, .LBB5_2 +; RV32I-NEXT: # %bb.1: # %entry +; RV32I-NEXT: fmv.d ft1, ft0 +; RV32I-NEXT: .LBB5_2: # %entry +; RV32I-NEXT: fsd ft1, 8(sp) +; RV32I-NEXT: lw a0, 8(sp) +; RV32I-NEXT: lw a1, 12(sp) +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: cmovdouble: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: andi a0, a0, 1 +; RV64I-NEXT: bnez a0, .LBB5_2 +; RV64I-NEXT: # %bb.1: # %entry +; RV64I-NEXT: fmv.d.x ft0, a2 +; RV64I-NEXT: fmv.x.d a0, ft0 +; RV64I-NEXT: ret +; RV64I-NEXT: .LBB5_2: +; RV64I-NEXT: fmv.d.x ft0, a1 +; RV64I-NEXT: fmv.x.d a0, ft0 +; RV64I-NEXT: ret +entry: + %cond = select i1 %a, double %b, double %c + ret double %cond +} + +; Check that selects with dependencies on previous ones aren't incorrectly +; optimized. + +define i32 @cmovccdep(i32 signext %a, i32 %b, i32 %c, i32 %d) nounwind { +; RV32I-LABEL: cmovccdep: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi a4, zero, 123 +; RV32I-NEXT: bne a0, a4, .LBB6_3 +; RV32I-NEXT: # %bb.1: # %entry +; RV32I-NEXT: mv a2, a1 +; RV32I-NEXT: bne a0, a4, .LBB6_4 +; RV32I-NEXT: .LBB6_2: # %entry +; RV32I-NEXT: add a0, a1, a2 +; RV32I-NEXT: ret +; RV32I-NEXT: .LBB6_3: # %entry +; RV32I-NEXT: mv a1, a2 +; RV32I-NEXT: mv a2, a1 +; RV32I-NEXT: beq a0, a4, .LBB6_2 +; RV32I-NEXT: .LBB6_4: # %entry +; RV32I-NEXT: mv a2, a3 +; RV32I-NEXT: add a0, a1, a2 +; RV32I-NEXT: ret +; +; RV64I-LABEL: cmovccdep: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: addi a4, zero, 123 +; RV64I-NEXT: bne a0, a4, .LBB6_3 +; RV64I-NEXT: # %bb.1: # %entry +; RV64I-NEXT: mv a2, a1 +; RV64I-NEXT: bne a0, a4, .LBB6_4 +; RV64I-NEXT: .LBB6_2: # %entry +; RV64I-NEXT: add a0, a1, a2 +; RV64I-NEXT: ret +; RV64I-NEXT: .LBB6_3: # %entry +; RV64I-NEXT: mv a1, a2 +; RV64I-NEXT: mv a2, a1 +; RV64I-NEXT: beq a0, a4, .LBB6_2 +; RV64I-NEXT: .LBB6_4: # %entry +; RV64I-NEXT: mv a2, a3 +; RV64I-NEXT: add a0, a1, a2 +; RV64I-NEXT: ret +entry: + %cmp = icmp eq i32 %a, 123 + %cond1 = select i1 %cmp, i32 %b, i32 %c + %cond2 = select i1 %cmp, i32 %cond1, i32 %d + %ret = add i32 %cond1, %cond2 + ret i32 %ret +} + +; Check that selects with different conditions aren't incorrectly optimized. + +define i32 @cmovdiffcc(i1 %a, i1 %b, i32 %c, i32 %d, i32 %e, i32 %f) nounwind { +; RV32I-LABEL: cmovdiffcc: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: andi a1, a1, 1 +; RV32I-NEXT: beqz a1, .LBB7_3 +; RV32I-NEXT: # %bb.1: # %entry +; RV32I-NEXT: andi a0, a0, 1 +; RV32I-NEXT: beqz a0, .LBB7_4 +; RV32I-NEXT: .LBB7_2: # %entry +; RV32I-NEXT: add a0, a2, a4 +; RV32I-NEXT: ret +; RV32I-NEXT: .LBB7_3: # %entry +; RV32I-NEXT: mv a4, a5 +; RV32I-NEXT: andi a0, a0, 1 +; RV32I-NEXT: bnez a0, .LBB7_2 +; RV32I-NEXT: .LBB7_4: # %entry +; RV32I-NEXT: mv a2, a3 +; RV32I-NEXT: add a0, a2, a4 +; RV32I-NEXT: ret +; +; RV64I-LABEL: cmovdiffcc: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: andi a1, a1, 1 +; RV64I-NEXT: beqz a1, .LBB7_3 +; RV64I-NEXT: # %bb.1: # %entry +; RV64I-NEXT: andi a0, a0, 1 +; RV64I-NEXT: beqz a0, .LBB7_4 +; RV64I-NEXT: .LBB7_2: # %entry +; RV64I-NEXT: add a0, a2, a4 +; RV64I-NEXT: ret +; RV64I-NEXT: .LBB7_3: # %entry +; RV64I-NEXT: mv a4, a5 +; RV64I-NEXT: andi a0, a0, 1 +; RV64I-NEXT: bnez a0, .LBB7_2 +; RV64I-NEXT: .LBB7_4: # %entry +; RV64I-NEXT: mv a2, a3 +; RV64I-NEXT: add a0, a2, a4 +; RV64I-NEXT: ret +entry: + %cond1 = select i1 %a, i32 %c, i32 %d + %cond2 = select i1 %b, i32 %e, i32 %f + %ret = add i32 %cond1, %cond2 + ret i32 %ret +} Index: test/CodeGen/RISCV/select-optimize-multiple.mir =================================================================== --- /dev/null +++ test/CodeGen/RISCV/select-optimize-multiple.mir @@ -0,0 +1,191 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=riscv32 -run-pass=expand-isel-pseudos -simplify-mir -o - %s \ +# RUN: | FileCheck -check-prefix=RV32I %s +# RUN: llc -mtriple=riscv64 -run-pass=expand-isel-pseudos -simplify-mir -o - %s \ +# RUN: | FileCheck -check-prefix=RV64I %s + +# Provide dummy definitions of functions and just enough metadata to create a +# DBG_VALUE. +--- | + define void @cmov_interleaved_bad() { + ret void + } + define void @cmov_interleaved_debug_value() { + ret void + } + !1 = !DIExpression() +... +--- +# Here we have a sequence of select instructions with a non-select instruction +# in the middle. Because the non-select depends on the result of a previous +# select, we cannot optimize the sequence to share control-flow. +name: cmov_interleaved_bad +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr } + - { id: 1, class: gpr } + - { id: 2, class: gpr } + - { id: 3, class: gpr } + - { id: 4, class: gpr } + - { id: 5, class: gpr } + - { id: 6, class: gpr } + - { id: 7, class: gpr } + - { id: 8, class: gpr } + - { id: 9, class: gpr } + - { id: 10, class: gpr } +liveins: + - { reg: '$x10', virtual-reg: '%0' } + - { reg: '$x11', virtual-reg: '%1' } + - { reg: '$x12', virtual-reg: '%2' } + - { reg: '$x13', virtual-reg: '%3' } +body: | + bb.0: + liveins: $x10, $x11, $x12, $x13 + + ; RV32I-LABEL: name: cmov_interleaved_bad + ; RV32I: successors: %bb.1, %bb.2 + ; RV32I: liveins: $x10, $x11, $x12, $x13 + ; RV32I: [[COPY:%[0-9]+]]:gpr = COPY $x13 + ; RV32I: [[COPY1:%[0-9]+]]:gpr = COPY $x12 + ; RV32I: [[COPY2:%[0-9]+]]:gpr = COPY $x11 + ; RV32I: [[COPY3:%[0-9]+]]:gpr = COPY $x10 + ; RV32I: [[ANDI:%[0-9]+]]:gpr = ANDI [[COPY3]], 1 + ; RV32I: [[COPY4:%[0-9]+]]:gpr = COPY $x0 + ; RV32I: BNE [[ANDI]], [[COPY4]], %bb.2 + ; RV32I: .1: + ; RV32I: .2: + ; RV32I: successors: %bb.3, %bb.4 + ; RV32I: [[PHI:%[0-9]+]]:gpr = PHI [[COPY2]], %bb.0, [[COPY1]], %bb.1 + ; RV32I: [[ADDI:%[0-9]+]]:gpr = ADDI [[PHI]], 1 + ; RV32I: BNE [[ANDI]], [[COPY4]], %bb.4 + ; RV32I: .3: + ; RV32I: .4: + ; RV32I: [[PHI1:%[0-9]+]]:gpr = PHI [[COPY]], %bb.2, [[COPY1]], %bb.3 + ; RV32I: [[ADD:%[0-9]+]]:gpr = ADD [[PHI]], killed [[PHI1]] + ; RV32I: $x10 = COPY [[ADD]] + ; RV32I: PseudoRET implicit $x10 + ; RV64I-LABEL: name: cmov_interleaved_bad + ; RV64I: successors: %bb.1, %bb.2 + ; RV64I: liveins: $x10, $x11, $x12, $x13 + ; RV64I: [[COPY:%[0-9]+]]:gpr = COPY $x13 + ; RV64I: [[COPY1:%[0-9]+]]:gpr = COPY $x12 + ; RV64I: [[COPY2:%[0-9]+]]:gpr = COPY $x11 + ; RV64I: [[COPY3:%[0-9]+]]:gpr = COPY $x10 + ; RV64I: [[ANDI:%[0-9]+]]:gpr = ANDI [[COPY3]], 1 + ; RV64I: [[COPY4:%[0-9]+]]:gpr = COPY $x0 + ; RV64I: BNE [[ANDI]], [[COPY4]], %bb.2 + ; RV64I: .1: + ; RV64I: .2: + ; RV64I: successors: %bb.3, %bb.4 + ; RV64I: [[PHI:%[0-9]+]]:gpr = PHI [[COPY2]], %bb.0, [[COPY1]], %bb.1 + ; RV64I: [[ADDI:%[0-9]+]]:gpr = ADDI [[PHI]], 1 + ; RV64I: BNE [[ANDI]], [[COPY4]], %bb.4 + ; RV64I: .3: + ; RV64I: .4: + ; RV64I: [[PHI1:%[0-9]+]]:gpr = PHI [[COPY]], %bb.2, [[COPY1]], %bb.3 + ; RV64I: [[ADD:%[0-9]+]]:gpr = ADD [[PHI]], killed [[PHI1]] + ; RV64I: $x10 = COPY [[ADD]] + ; RV64I: PseudoRET implicit $x10 + %3:gpr = COPY $x13 + %2:gpr = COPY $x12 + %1:gpr = COPY $x11 + %0:gpr = COPY $x10 + %5:gpr = ANDI %0, 1 + %6:gpr = COPY $x0 + %7:gpr = Select_GPR_Using_CC_GPR %5, %6, 22, %1, %2 + %8:gpr = ADDI %7, 1 + %9:gpr = Select_GPR_Using_CC_GPR %5, %6, 22, %3, %2 + %10:gpr = ADD %7, killed %9 + $x10 = COPY %10 + PseudoRET implicit $x10 + +... +--- +# Demonstrate that debug info associated with selects is correctly moved to +# the tail basic block, while debug info associated with non-selects is left +# in the head basic block. +name: cmov_interleaved_debug_value +alignment: 2 +tracksRegLiveness: true +registers: + - { id: 0, class: gpr } + - { id: 1, class: gpr } + - { id: 2, class: gpr } + - { id: 3, class: gpr } + - { id: 4, class: gpr } + - { id: 5, class: gpr } + - { id: 6, class: gpr } + - { id: 7, class: gpr } + - { id: 8, class: gpr } + - { id: 9, class: gpr } + - { id: 10, class: gpr } +liveins: + - { reg: '$x10', virtual-reg: '%0' } + - { reg: '$x11', virtual-reg: '%1' } + - { reg: '$x12', virtual-reg: '%2' } + - { reg: '$x13', virtual-reg: '%3' } +body: | + bb.0: + liveins: $x10, $x11, $x12, $x13 + + ; RV32I-LABEL: name: cmov_interleaved_debug_value + ; RV32I: successors: %bb.1, %bb.2 + ; RV32I: liveins: $x10, $x11, $x12, $x13 + ; RV32I: [[COPY:%[0-9]+]]:gpr = COPY $x13 + ; RV32I: [[COPY1:%[0-9]+]]:gpr = COPY $x12 + ; RV32I: [[COPY2:%[0-9]+]]:gpr = COPY $x11 + ; RV32I: [[COPY3:%[0-9]+]]:gpr = COPY $x10 + ; RV32I: [[ANDI:%[0-9]+]]:gpr = ANDI [[COPY3]], 1 + ; RV32I: [[COPY4:%[0-9]+]]:gpr = COPY $x0 + ; RV32I: [[ADDI:%[0-9]+]]:gpr = ADDI [[COPY3]], 1 + ; RV32I: DBG_VALUE [[ADDI]], $noreg, !DIExpression(), !DIExpression() + ; RV32I: BNE [[ANDI]], [[COPY4]], %bb.2 + ; RV32I: .1: + ; RV32I: .2: + ; RV32I: [[PHI:%[0-9]+]]:gpr = PHI [[COPY]], %bb.0, [[COPY1]], %bb.1 + ; RV32I: [[PHI1:%[0-9]+]]:gpr = PHI [[COPY2]], %bb.0, [[COPY1]], %bb.1 + ; RV32I: DBG_VALUE [[PHI1]], $noreg, !DIExpression(), !DIExpression() + ; RV32I: DBG_VALUE [[PHI]], $noreg, !DIExpression(), !DIExpression() + ; RV32I: [[ADD:%[0-9]+]]:gpr = ADD [[PHI1]], killed [[PHI]] + ; RV32I: $x10 = COPY [[ADD]] + ; RV32I: PseudoRET implicit $x10 + ; RV64I-LABEL: name: cmov_interleaved_debug_value + ; RV64I: successors: %bb.1, %bb.2 + ; RV64I: liveins: $x10, $x11, $x12, $x13 + ; RV64I: [[COPY:%[0-9]+]]:gpr = COPY $x13 + ; RV64I: [[COPY1:%[0-9]+]]:gpr = COPY $x12 + ; RV64I: [[COPY2:%[0-9]+]]:gpr = COPY $x11 + ; RV64I: [[COPY3:%[0-9]+]]:gpr = COPY $x10 + ; RV64I: [[ANDI:%[0-9]+]]:gpr = ANDI [[COPY3]], 1 + ; RV64I: [[COPY4:%[0-9]+]]:gpr = COPY $x0 + ; RV64I: [[ADDI:%[0-9]+]]:gpr = ADDI [[COPY3]], 1 + ; RV64I: DBG_VALUE [[ADDI]], $noreg, !DIExpression(), !DIExpression() + ; RV64I: BNE [[ANDI]], [[COPY4]], %bb.2 + ; RV64I: .1: + ; RV64I: .2: + ; RV64I: [[PHI:%[0-9]+]]:gpr = PHI [[COPY]], %bb.0, [[COPY1]], %bb.1 + ; RV64I: [[PHI1:%[0-9]+]]:gpr = PHI [[COPY2]], %bb.0, [[COPY1]], %bb.1 + ; RV64I: DBG_VALUE [[PHI1]], $noreg, !DIExpression(), !DIExpression() + ; RV64I: DBG_VALUE [[PHI]], $noreg, !DIExpression(), !DIExpression() + ; RV64I: [[ADD:%[0-9]+]]:gpr = ADD [[PHI1]], killed [[PHI]] + ; RV64I: $x10 = COPY [[ADD]] + ; RV64I: PseudoRET implicit $x10 + %3:gpr = COPY $x13 + %2:gpr = COPY $x12 + %1:gpr = COPY $x11 + %0:gpr = COPY $x10 + %5:gpr = ANDI %0, 1 + %6:gpr = COPY $x0 + %7:gpr = Select_GPR_Using_CC_GPR %5, %6, 22, %1, %2 + DBG_VALUE %7, $noreg, !1, !1 + %8:gpr = ADDI %0, 1 + DBG_VALUE %8, $noreg, !1, !1 + %9:gpr = Select_GPR_Using_CC_GPR %5, %6, 22, %3, %2 + DBG_VALUE %9, $noreg, !1, !1 + %10:gpr = ADD %7, killed %9 + $x10 = COPY %10 + PseudoRET implicit $x10 + +... +---